[TOOLS] fix send_bw test for UD.
[mirror/winof/.git] / tools / perftests / user / send_bw / send_bw.c
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
4  *
5  * This software is available to you under a choice of one of two
6  * licenses.  You may choose to be licensed under the terms of the GNU
7  * General Public License (GPL) Version 2, available from the file
8  * COPYING in the main directory of this source tree, or the
9  * OpenIB.org BSD license below:
10  *
11  *     Redistribution and use in source and binary forms, with or
12  *     without modification, are permitted provided that the following
13  *     conditions are met:
14  *
15  *      - Redistributions of source code must retain the above
16  *        copyright notice, this list of conditions and the following
17  *        disclaimer.
18  *
19  *      - Redistributions in binary form must reproduce the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer in the documentation and/or other materials
22  *        provided with the distribution.
23  *
24  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31  * SOFTWARE.
32  *
33  * $Id$
34  */
35
36 #include "getopt.h"
37 #include "get_clock.h"
38
39 #include "perf_defs.h"
40
41 #define SIGNAL 1
42 #define MAX_INLINE 400
43
44 struct user_parameters {
45         const char      *servername;
46         int                     connection_type;
47         int                     mtu;
48         int                     all; /* run all msg size */
49         int                     signal_comp;
50         int                     iters;
51         int                     tx_depth;
52         int                     duplex;
53     int use_event;
54 };
55
56 static int page_size;
57 cycles_t        *tposted;
58 cycles_t        *tcompleted;
59 int post_recv;
60
61
62 void
63 pp_cq_comp_cb(
64         IN              const   ib_cq_handle_t                          h_cq,
65         IN                              void                                            *cq_context )
66 {
67         UNUSED_PARAM( h_cq );
68         UNUSED_PARAM( cq_context);
69         return ;
70 }
71
72
73
74 static struct pingpong_context *pp_init_ctx(unsigned size,int port, struct user_parameters *user_parm)
75 {
76
77         struct pingpong_context *ctx;
78         ib_api_status_t                         ib_status = IB_SUCCESS; 
79         size_t                                  guid_count;
80         ib_net64_t                              *ca_guid_array;
81
82
83         
84         ctx = malloc(sizeof *ctx);
85         if (!ctx)
86                 return NULL;
87
88         ctx->qp = malloc(sizeof (ib_qp_handle_t));
89         if (!ctx->qp) {
90                 perror("malloc");
91                 return NULL;
92         }
93         ctx->qp_attr = malloc(sizeof (ib_qp_attr_t));
94         if (!ctx->qp_attr) {
95                 perror("malloc");
96                 return NULL;
97         }
98
99         ctx->size = size;
100         ctx->tx_depth = user_parm->tx_depth;
101         /* in case of UD need space for the GRH */
102         if (user_parm->connection_type==UD) {
103                 ctx->buf = malloc(( size + 40 ) * 2); //PORTED ALINGED
104                 if (!ctx->buf) {
105                         fprintf(stderr, "Couldn't allocate work buf.\n");
106                         return NULL;
107                 }
108                 memset(ctx->buf, 0, ( size + 40 ) * 2);
109         } else {
110                 ctx->buf = malloc( size * 2); //PORTED ALINGED
111                 if (!ctx->buf) {
112                         fprintf(stderr, "Couldn't allocate work buf.\n");
113                         return NULL;
114                 }
115                 memset(ctx->buf, 0, size * 2);
116         }
117
118         /*
119          * Open the AL instance
120          */
121         ib_status = ib_open_al(&ctx->al);
122         if(ib_status != IB_SUCCESS)
123         {
124                 fprintf(stderr,"ib_open_al failed status = %d\n", ib_status);
125                 return NULL;
126         }
127
128         /*
129          * Get the Local CA Guids
130          */
131         ib_status = ib_get_ca_guids(ctx->al, NULL, &guid_count);
132         if(ib_status != IB_INSUFFICIENT_MEMORY)
133         {
134                 fprintf(stderr,"ib_get_ca_guids1 failed status = %d\n", (uint32_t)ib_status);
135                 return NULL;
136         }
137         
138         /*
139          * If no CA's Present then return
140          */
141
142         if(guid_count == 0)
143                 return NULL;
144
145         
146         ca_guid_array = (ib_net64_t*)malloc(sizeof(ib_net64_t) * guid_count);
147         
148         ib_status = ib_get_ca_guids(ctx->al, ca_guid_array, &guid_count);
149         if(ib_status != IB_SUCCESS)
150         {
151                 fprintf(stderr,"ib_get_ca_guids2 failed with status = %d\n", ib_status);
152                 return NULL;
153         }
154
155         /*
156          * Open only the first HCA
157          */
158         /* Open the CA */
159         ib_status = ib_open_ca(ctx->al ,ca_guid_array[0] ,NULL,
160                 NULL,   //ca_context
161                 &ctx->ca);
162
163         if(ib_status != IB_SUCCESS)
164         {
165                 fprintf(stderr,"ib_open_ca failed with status = %d\n", ib_status);
166                 return NULL;
167         }
168
169         //xxx
170         //printf("ib_open_ca passed i=%d\n",i); 
171         //xxx
172
173
174         {
175
176                 /* Query the CA */
177                 uint32_t bsize = 0;
178                 ib_status = ib_query_ca(ctx->ca, NULL, &bsize);
179                 if(ib_status != IB_INSUFFICIENT_MEMORY)
180                 {
181                         fprintf(stderr, "Failed to query device props");
182                         return NULL;
183                 }
184
185                 ctx->ca_attr = (ib_ca_attr_t *)malloc(bsize);
186
187                 ib_status = ib_query_ca(ctx->ca, ctx->ca_attr, &bsize);
188                 if(ib_status != IB_SUCCESS)
189                 {
190                         printf("ib_query_ca failed with status = %d\n", ib_status);
191                         return NULL;
192                 }
193                 if (user_parm->mtu == 0) {/*user did not ask for specific mtu */
194                         if (ctx->ca_attr->dev_id == 23108) {
195                                 user_parm->mtu = 1024;
196                         } else {
197                                 user_parm->mtu = 2048;
198                         }
199                 }
200         }
201                 
202         if (user_parm->use_event) {
203 //PORTED                ctx->channel = ibv_create_comp_channel(ctx->context);
204                 ctx->channel = NULL;//remove when PORTED
205                 if (!ctx->channel) {
206                         fprintf(stderr, "Couldn't create completion channel\n");
207                         return NULL;
208                 }
209         } else
210                 ctx->channel = NULL;                  
211
212         ib_status = ib_alloc_pd(ctx->ca ,
213                                                 IB_PDT_NORMAL,
214                                                 ctx, //pd_context
215                                                 &ctx->pd);
216         if (ib_status != IB_SUCCESS) {
217                 fprintf(stderr, "Couldn't allocate PD\n");
218                 return NULL;
219         }
220
221
222         {
223                 ib_mr_create_t                  mr_create;
224                 ib_cq_create_t                  cq_create;
225                 /* We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:
226                  * The Consumer is not allowed to assign Remote Write or Remote Atomic to
227                  * a Memory Region that has not been assigned Local Write. */
228                 if (user_parm->connection_type==UD) {
229                         mr_create.length = (size + 40 ) * 2;
230                 } else {
231                         mr_create.length = size * 2;
232                 }
233                         
234                 mr_create.vaddr = ctx->buf;
235                 mr_create.access_ctrl = IB_AC_RDMA_WRITE| IB_AC_LOCAL_WRITE;
236                 
237                 ib_status = ib_reg_mem(ctx->pd ,&mr_create ,&ctx->lkey ,&ctx->rkey ,&ctx->mr);
238                 if (ib_status != IB_SUCCESS) {
239                         fprintf(stderr, "Couldn't allocate MR\n");
240                         return NULL;
241                 }
242
243                 cq_create.size = user_parm->tx_depth*2;
244                 cq_create.h_wait_obj = NULL;
245                 cq_create.pfn_comp_cb = pp_cq_comp_cb;
246                 ib_status = ib_create_cq(ctx->ca,&cq_create ,ctx, NULL, &ctx->scq);
247                 if (ib_status != IB_SUCCESS) {
248                         fprintf(stderr, "Couldn't create CQ\n");
249                         return NULL;
250                 }
251         }
252         
253         {
254                 ib_qp_create_t  qp_create;
255                 memset(&qp_create, 0, sizeof(ib_qp_create_t));
256                 qp_create.h_sq_cq       = ctx->scq;
257                 qp_create.h_rq_cq       = ctx->scq;
258                 qp_create.sq_depth      = user_parm->tx_depth;
259                 qp_create.rq_depth      = user_parm->tx_depth;
260                 qp_create.sq_sge        = 1;
261                 qp_create.rq_sge        = 1;
262                 //TODO MAX_INLINE
263
264                 switch (user_parm->connection_type) {
265                 case RC :
266                         qp_create.qp_type= IB_QPT_RELIABLE_CONN;
267                         break;
268                 case UC :
269                         qp_create.qp_type = IB_QPT_UNRELIABLE_CONN;
270                         break;
271                 case UD :
272                         qp_create.qp_type = IB_QPT_UNRELIABLE_DGRM;
273                         break;
274                 default:
275                         fprintf(stderr, "Unknown connection type %d \n",user_parm->connection_type);
276                         return NULL;
277                 }
278                 
279                 qp_create.sq_signaled = FALSE;
280                 /*attr.sq_sig_all = 0;*/
281
282                 ib_status = ib_create_qp(ctx->pd, &qp_create,NULL,NULL,&ctx->qp[0]);
283                 if (ib_status != IB_SUCCESS){
284                         fprintf(stderr, "Couldn't create QP\n");
285                         return NULL;
286                 }
287         }
288
289         {
290                 ib_qp_mod_t     qp_modify;
291                 ib_qp_attr_t    qp_attr;
292                 memset(&qp_modify, 0, sizeof(ib_qp_mod_t));
293                 qp_modify.req_state = IB_QPS_INIT;
294                 qp_modify.state.init.pkey_index = 0 ;
295                 qp_modify.state.init.primary_port = (uint8_t)port;
296                 if (user_parm->connection_type==UD) {
297                         qp_modify.state.init.qkey = 0x11111111;
298                 } else {
299                         qp_modify.state.init.access_ctrl = IB_AC_RDMA_WRITE | IB_AC_LOCAL_WRITE;
300                 }
301                 
302                 ib_status = ib_modify_qp(ctx->qp[0], &qp_modify);
303                 if (ib_status != IB_SUCCESS){
304                         fprintf(stderr, "Failed to modify QP to INIT\n");
305                         return NULL;
306                 }
307
308
309                 memset(&qp_attr, 0, sizeof(ib_qp_attr_t));
310                 ib_status = ib_query_qp(ctx->qp[0], &ctx->qp_attr[0]);
311                 if (ib_status != IB_SUCCESS){
312                         fprintf(stderr, "Failed to modify QP to INIT\n");
313                         return NULL;
314                 }
315                 fprintf(stderr, "max inline size %d\n",ctx->qp_attr[0].sq_max_inline);
316                 
317         }
318         return ctx;
319
320 }
321
322 static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
323                           struct pingpong_dest *dest, struct user_parameters *user_parm,int  index)
324 {
325
326         ib_api_status_t ib_status;
327         ib_qp_mod_t     attr;
328         memset(&attr, 0, sizeof(ib_qp_mod_t));
329
330         attr.req_state          = IB_QPS_RTR;
331         switch (user_parm->mtu) {
332         case 256 : 
333                 attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_LEN_256;
334                 break;
335         case 512 :
336                 attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_LEN_512;
337                 break;
338         case 1024 :
339                 attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_LEN_1024;
340                 break;
341         case 2048 :
342                 attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_LEN_2048;
343                 break;
344         }
345         printf("Mtu : %d\n", user_parm->mtu);
346         attr.state.rtr.dest_qp  = (dest->qpn);
347         attr.state.rtr.rq_psn   = (dest->psn);
348         if (user_parm->connection_type==RC) {
349                 attr.state.rtr.resp_res = 1;
350                 attr.state.rtr.rnr_nak_timeout = 12;
351         }
352         attr.state.rtr.primary_av.grh_valid = 0;
353         attr.state.rtr.primary_av.dlid = dest->lid;
354         attr.state.rtr.primary_av.sl = 0;
355         attr.state.rtr.primary_av.path_bits = 0;
356         attr.state.rtr.primary_av.port_num = (uint8_t)port;
357         attr.state.rtr.primary_av.static_rate = IB_PATH_RECORD_RATE_10_GBS;
358         attr.state.rtr.opts = IB_MOD_QP_LOCAL_ACK_TIMEOUT |
359                                         IB_MOD_QP_RESP_RES |
360                                         IB_MOD_QP_PRIMARY_AV;
361
362
363         ib_status = ib_modify_qp(ctx->qp[0], &attr);
364         if(ib_status != IB_SUCCESS){
365                         fprintf(stderr, "Failed to modify UC QP to RTR\n");
366                         return 1;
367         }
368
369         if (user_parm->connection_type == UD) {
370                 ib_av_attr_t    av_attr;
371
372                 av_attr.grh_valid = 0;
373                 av_attr.dlid = dest->lid;
374                 av_attr.sl = 0;
375                 av_attr.path_bits = 0;
376                 av_attr.port_num = (uint8_t)port;
377                 av_attr.static_rate = IB_PATH_RECORD_RATE_10_GBS;
378
379                 ib_status = ib_create_av(ctx->pd,&av_attr, &ctx->av);
380                 if (ib_status != IB_SUCCESS) {
381                         fprintf(stderr, "Failed to create AH for UD\n");
382                         return 1;
383                 }
384         }
385
386
387         memset(&attr, 0, sizeof(ib_qp_mod_t));
388         attr.req_state  = IB_QPS_RTS;
389         attr.state.rts.sq_psn = my_psn;
390
391         if (user_parm->connection_type == RC) {
392                 attr.state.rts.resp_res = 1;
393                 attr.state.rts.local_ack_timeout = 14;
394                 attr.state.rts.retry_cnt = 7;
395                 attr.state.rts.rnr_retry_cnt = 7;
396                 attr.state.rts.opts = IB_MOD_QP_RNR_RETRY_CNT |
397                                                 IB_MOD_QP_RETRY_CNT |
398                                                 IB_MOD_QP_LOCAL_ACK_TIMEOUT;
399                                                 
400         }       
401         ib_status = ib_modify_qp(ctx->qp[index], &attr);
402         if(ib_status != IB_SUCCESS){
403                 fprintf(stderr, "Failed to modify UC QP to RTS\n");
404                 return 1;
405         }
406
407         
408         
409         /* post recieve max msg size*/
410         {
411                 int i;
412                 ib_recv_wr_t      *bad_wr_recv;
413                 //recieve
414                 ctx->rwr.wr_id      = PINGPONG_RECV_WRID;
415                 ctx->rwr.ds_array = &ctx->recv_list;
416                 ctx->rwr.num_ds = 1;
417                 ctx->rwr.p_next = NULL;
418                 ctx->recv_list.vaddr = (uintptr_t) ctx->buf;
419                 if (user_parm->connection_type==UD) {
420                         ctx->recv_list.length = ctx->size + 40;
421                 } else {
422                         ctx->recv_list.length = ctx->size;
423                 }
424                 ctx->recv_list.lkey = ctx->lkey;
425                 for (i = 0; i < user_parm->tx_depth; ++i) {
426                         ib_status = ib_post_recv(ctx->qp[index], &ctx->rwr, &bad_wr_recv);
427                         if (ib_status != IB_SUCCESS)
428                         {
429                                 fprintf(stderr, "Couldn't post recv: counter=%d\n", i);
430                                 return 14;
431                         }
432                         PERF_DEBUG("rcnt = %d \n",i);
433                 }   
434         }
435         post_recv = user_parm->tx_depth;
436
437         return 0;
438 }
439
440 static SOCKET pp_open_port(struct pingpong_context *ctx, const char * servername,
441                         int ib_port, int port, struct pingpong_dest **p_my_dest,
442                         struct pingpong_dest **p_rem_dest,struct user_parameters *user_parm)
443 {
444         struct pingpong_dest    *my_dest;
445         struct pingpong_dest    *rem_dest;
446         SOCKET                          sockfd;
447         int                                     rc;
448         int                                     i;
449         int                                     numofqps = 1;
450         
451         /* Create connection between client and server.
452          * We do it by exchanging data over a TCP socket connection. */
453
454         
455         my_dest = malloc( sizeof (struct pingpong_dest) * numofqps);
456         if (!my_dest){
457                 perror("malloc");
458                 return INVALID_SOCKET;
459         }
460
461         rem_dest = malloc(sizeof (struct pingpong_dest) * numofqps );
462         if (!rem_dest){
463                 perror("malloc");
464                 return INVALID_SOCKET;
465         }
466
467         sockfd = servername ? pp_client_connect(servername, port) :
468                 pp_server_connect(port);
469
470         if (sockfd  == INVALID_SOCKET) {
471                 printf("pp_connect_sock(%s,%d) failed (%d)!\n",
472                        servername, port, sockfd);
473                 return INVALID_SOCKET;
474         }
475
476         
477         for (i =0 ;i<numofqps;i ++) 
478         {
479                 /* Create connection between client and server.
480                 * We do it by exchanging data over a TCP socket connection. */
481                 
482                 my_dest[i].lid = ctx->ca_attr->p_port_attr[ib_port-1].lid;
483                 my_dest[i].psn = rand() & 0xffffff;
484                 if (!my_dest[i].lid) {
485                         fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
486                         return 1;
487                 }
488                 my_dest[i].qpn = ctx->qp_attr[i].num;
489                 /* TBD this should be changed inot VA and different key to each qp */
490                 my_dest[i].rkey = ctx->rkey;
491                 my_dest[i].vaddr = (uintptr_t)ctx->buf + ctx->size;
492
493                 printf("  local address:  LID %#04x, QPN %#06x, PSN %#06x, "
494                 "RKey %#08x VAddr %#016Lx\n",
495                 my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn,
496                 my_dest[i].rkey, my_dest[i].vaddr);
497
498                 rc = servername ? pp_client_exch_dest(sockfd, &my_dest[i],&rem_dest[i]):
499                                                 pp_server_exch_dest(sockfd, &my_dest[i],&rem_dest[i]);
500                 if (rc)
501                         return INVALID_SOCKET;
502                 printf("  remote address: LID %#04x, QPN %#06x, PSN %#06x, "
503                 "RKey %#08x VAddr %#016Lx\n",
504                 rem_dest[i].lid, rem_dest[i].qpn, rem_dest[i].psn,
505                 rem_dest[i].rkey, rem_dest[i].vaddr);
506
507                 if (pp_connect_ctx(ctx, ib_port, my_dest[i].psn, &rem_dest[i], user_parm, i))
508                         return INVALID_SOCKET;
509                 /* An additional handshake is required *after* moving qp to RTR.
510                 Arbitrarily reuse exch_dest for this purpose. */
511                 rc = servername ? pp_client_exch_dest(sockfd, &my_dest[i],&rem_dest[i]):
512                                                 pp_server_exch_dest(sockfd, &my_dest[i],&rem_dest[i]);
513                 if (rc)
514                         return INVALID_SOCKET;
515         }
516         *p_rem_dest = rem_dest;
517         *p_my_dest = my_dest;
518         return sockfd;
519 }
520
521
522 static void usage(const char *argv0)
523 {
524         printf("Usage:\n");
525         printf("  %s            start a server and wait for connection\n", argv0);
526         printf("  %s <host>     connect to server at <host>\n", argv0);
527         printf("\n");
528         printf("Options:\n");
529         printf("  -p, --port=<port>         listen on/connect to port <port> (default 18515)\n");
530         printf("  -d, --ib-dev=<dev>        use IB device <dev> (default first device found)\n");
531         printf("  -i, --ib-port=<port>      use port <port> of IB device (default 1)\n");
532         printf("  -c, --connection=<RC/UC>  connection type RC/UC/UD (default RC)\n");
533         printf("  -m, --mtu=<mtu>           mtu size (default 1024)\n");
534         printf("  -s, --size=<size>         size of message to exchange (default 65536)\n");
535         printf("  -a, --all                 Run sizes from 2 till 2^23\n");
536         printf("  -t, --tx-depth=<dep>      size of tx queue (default 300)\n");
537         printf("  -n, --iters=<iters>       number of exchanges (at least 2, default 1000)\n");
538         printf("  -b, --bidirectional       measure bidirectional bandwidth (default unidirectional)\n");
539         printf("  -V, --version             display version number\n");
540     printf("  -e, --events              sleep on CQ events (default poll)\n");
541 }
542
543 static void print_report(unsigned int iters, unsigned size, int duplex,
544                          cycles_t *tposted, cycles_t *tcompleted)
545 {
546         double                  cycles_to_units;
547         uint64_t                        tsize;  /* Transferred size, in megabytes */
548         int                             i, j;
549         int                             opt_posted = 0, opt_completed = 0;
550         cycles_t                        opt_delta;
551         cycles_t                        t;
552
553
554         opt_delta = tcompleted[opt_posted] - tposted[opt_completed];
555
556         /* Find the peak bandwidth */
557         for (i = 0; i < (int)iters; ++i)
558                 for (j = i; j < (int)iters; ++j) {
559                         t = (tcompleted[j] - tposted[i]) / (j - i + 1);
560                         if (t < opt_delta) {
561                                 opt_delta  = t;
562                                 opt_posted = i;
563                                 opt_completed = j;
564                         }
565                 }
566
567         cycles_to_units = get_cpu_mhz();
568
569         tsize = duplex ? 2 : 1;
570         tsize = tsize * size;
571         printf("%7d        %d            %7.2f               %7.2f \n",
572                size,iters,tsize * cycles_to_units / opt_delta / 0x100000,
573                 (uint64_t)tsize * iters * cycles_to_units /(tcompleted[iters - 1] - tposted[0]) / 0x100000);
574 }
575
576
577 int run_iter_bi(struct pingpong_context *ctx, struct user_parameters *user_param,
578                 struct pingpong_dest *rem_dest, int size)
579 {
580
581         ib_qp_handle_t  qp;
582         int                             scnt, ccnt, rcnt;
583         ib_recv_wr_t            *bad_wr_recv;
584         ib_api_status_t ib_status;
585
586         if (user_param->connection_type==UD) {
587                 if (size > 2048) {
588                         size = 2048;
589                 }
590         }
591         /*********************************************
592          * Important note :
593          * In case of UD/UC this is NOT the way to measure
594          * BW sicen we are running with loop on the send side
595          * while we should run on the recieve side or enable retry in SW
596          * Since the sender may be faster than the reciver than although
597          * we had posted recieve it is not enough and might end this will
598          * result in deadlock of test since both sides are stuck on poll cq
599          * In this test i do not solve this for the general test ,need to write
600          * seperate test for UC/UD but in case the tx_depth is ~1/3 from the
601          * number of iterations this should be ok .
602          * Also note that the sender is limited in the number of send, ans
603          * i try to make the reciver full 
604          *********************************************/
605         /* send */
606         if (user_param->connection_type==UD) {
607                 ctx->list.vaddr = (uintptr_t) ctx->buf + 40;
608                 ctx->wr.dgrm.ud.h_av = ctx->av;
609                 ctx->wr.dgrm.ud.remote_qp  = rem_dest->qpn;
610                 ctx->wr.dgrm.ud.remote_qkey = 0x11111111;
611         } else {
612                 ctx->list.vaddr = (uintptr_t) ctx->buf;
613         }
614         ctx->list.lkey = ctx->lkey;
615         ctx->wr.wr_id = PINGPONG_SEND_WRID;
616         ctx->wr.ds_array = &ctx->list;
617         ctx->wr.num_ds = 1;
618         ctx->wr.wr_type = WR_SEND;
619         ctx->wr.p_next       = NULL;
620         
621         if ((uint32_t)size > ctx->qp_attr[0].sq_max_inline) { /*complaince to perf_main */
622                 ctx->wr.send_opt = IB_SEND_OPT_SIGNALED;
623         } else {
624                 ctx->wr.send_opt = IB_SEND_OPT_SIGNALED | IB_SEND_OPT_INLINE;
625         }
626         
627         ctx->list.length = size;
628         scnt = 0;
629         ccnt = 0;
630         rcnt = 0;
631         qp = ctx->qp[0];
632
633         while (ccnt < user_param->iters || rcnt < user_param->iters ) {
634                 while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth / 2) {
635                         ib_send_wr_t    *bad_wr;
636                         tposted[scnt] = get_cycles();
637                         ib_status = ib_post_send(qp, &ctx->wr, &bad_wr);
638                         if (ib_status != IB_SUCCESS) {
639                                 fprintf(stderr, "Couldn't post send: scnt=%d ib_status %d\n",
640                                         scnt,ib_status);
641                                 return 1;
642                         }
643                         ++scnt;
644                         PERF_DEBUG("scnt = %d \n",scnt);
645                 }
646
647                 {
648                         ib_wc_t wc;
649                         ib_wc_t *p_wc_done,*p_wc_free;
650
651
652                         p_wc_free = &wc;
653                         p_wc_free->p_next = NULL;
654                         p_wc_done = NULL;
655 #if PORTED
656                         if (user_param->use_event) {
657                                 struct ibv_cq *ev_cq;
658                                 void          *ev_ctx;
659                                 if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
660                                         fprintf(stderr, "Failed to get cq_event\n");
661                                         return 1;
662                                 }                
663                                 if (ev_cq != ctx->cq) {
664                                         fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
665                                         return 1;
666                                 }
667                                 if (ibv_req_notify_cq(ctx->cq, 0)) {
668                                         fprintf(stderr, "Couldn't request CQ notification\n");
669                                         return 1;
670                                 }
671                         }
672 #endif
673                         do {
674                                 ib_status = ib_poll_cq(ctx->scq, &p_wc_free, &p_wc_done);
675                                 if (ib_status == IB_SUCCESS ) {
676                                         if (p_wc_done->status != IB_WCS_SUCCESS) {
677                                                 fprintf(stderr, "Completion wth error at %s:\n",
678                                                 user_param->servername ? "client" : "server");
679                                                 fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",
680                                                 p_wc_done->status, (int) p_wc_done->wr_id, p_wc_done->vendor_specific);
681                                                 fprintf(stderr, "scnt=%d, ccnt=%d\n",
682                                                 scnt, ccnt);
683                                                 return 1;
684                                         }
685                                         switch ((int) p_wc_done->wr_id) {
686                                         case PINGPONG_SEND_WRID:
687                                                 tcompleted[ccnt] = get_cycles();
688                                                 ++ccnt;
689                                                 break;
690                                         case PINGPONG_RECV_WRID:
691                                                 if (--post_recv <= user_param->tx_depth - 2) {
692                                                         while (rcnt < user_param->iters && (user_param->tx_depth - post_recv) > 0 ) {
693                                                                 post_recv++;
694                                                                 ib_status = ib_post_recv(ctx->qp[0], &ctx->rwr, &bad_wr_recv);
695                                                                 if (ib_status != IB_SUCCESS)
696                                                                 {
697                                                                         fprintf(stderr, "Couldn't post recv: rcnt=%d\n",
698                                                                         rcnt);
699                                                                         return 15;
700                                                                 }
701                                                         }
702                                                 }
703                                                 ++rcnt;
704                                                 break;
705                                         default:
706                                                 fprintf(stderr, "Completion for unknown wr_id %d\n",
707                                                         (int) wc.wr_id);
708                                                 break;
709                                         }
710                                         p_wc_free = p_wc_done;
711                                         p_wc_free->p_next = NULL;
712                                         p_wc_done = NULL;
713                                         PERF_DEBUG("ccnt = %d \n",ccnt);
714                                         PERF_DEBUG("rcnt = %d \n",rcnt);
715                                 }
716                         } while (ib_status == IB_SUCCESS );
717
718                         if (ib_status != IB_NOT_FOUND) {
719                                 fprintf(stderr, "poll CQ failed %d\n", ib_status);
720                                 return 1;
721                         }
722
723                 }
724         }
725         return(0);
726 }
727
728
729 int run_iter_uni(struct pingpong_context *ctx, struct user_parameters *user_param,
730                  struct pingpong_dest *rem_dest, int size)
731 {
732
733         ib_qp_handle_t  qp;
734         int                             scnt, ccnt, rcnt;
735         ib_recv_wr_t            *bad_wr_recv;
736         ib_api_status_t ib_status;
737
738
739         /* send */
740         if (user_param->connection_type==UD) {
741                 ctx->list.vaddr = (uintptr_t) ctx->buf + 40;
742                 ctx->wr.dgrm.ud.h_av = ctx->av;
743                 ctx->wr.dgrm.ud.remote_qp  = rem_dest->qpn;
744                 ctx->wr.dgrm.ud.remote_qkey = 0x11111111;
745         } else {
746                 ctx->list.vaddr = (uintptr_t) ctx->buf;
747         }
748         ctx->list.lkey = ctx->lkey;
749         ctx->wr.wr_id = PINGPONG_SEND_WRID;
750         ctx->wr.ds_array = &ctx->list;
751         ctx->wr.num_ds = 1;
752         ctx->wr.wr_type = WR_SEND;
753         ctx->wr.p_next       = NULL;
754
755         
756         if ((uint32_t)size > ctx->qp_attr[0].sq_max_inline) { /*complaince to perf_main */
757                 ctx->wr.send_opt = IB_SEND_OPT_SIGNALED;
758         } else {
759                 ctx->wr.send_opt = IB_SEND_OPT_SIGNALED | IB_SEND_OPT_INLINE;
760         }
761         ctx->list.length = size;
762         
763         scnt = 0;
764         ccnt = 0;
765         rcnt = 0;
766         qp = ctx->qp[0];
767         if (!user_param->servername) {
768                 while (rcnt < user_param->iters) {
769                         ib_wc_t wc;
770                         ib_wc_t *p_wc_done,*p_wc_free;
771
772                         p_wc_free = &wc;
773                         p_wc_done = NULL;
774                         p_wc_free->p_next = NULL;
775                         
776                         /*Server is polling on recieve first */
777 #if PORTED
778                         if (user_param->use_event) {
779                                 struct ibv_cq *ev_cq;
780                                 void          *ev_ctx;
781                                 if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
782                                         fprintf(stderr, "Failed to get cq_event\n");
783                                         return 1;
784                                 }                
785                                 if (ev_cq != ctx->cq) {
786                                         fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
787                                         return 1;
788                                 }
789                                 if (ibv_req_notify_cq(ctx->cq, 0)) {
790                                 fprintf(stderr, "Couldn't request CQ notification\n");
791                                 return 1;
792                                 }
793                         }
794 #endif
795                         do {
796                                 ib_status = ib_poll_cq(ctx->scq, &p_wc_free, &p_wc_done);
797                                 if (ib_status == IB_SUCCESS) {
798                                         tcompleted[ccnt] = get_cycles();
799                                         if (p_wc_done->status != IB_WCS_SUCCESS) {
800                                                 fprintf(stderr, "Completion wth error at %s:\n",
801                                                 user_param->servername ? "client" : "server");
802                                                 fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",
803                                                 p_wc_done->status, (int) p_wc_done->wr_id, p_wc_done->vendor_specific);
804                                                 fprintf(stderr, "scnt=%d, ccnt=%d\n",
805                                                 scnt, ccnt);
806                                                 return 1;
807                                         }
808                                         
809                                         ib_status = ib_post_recv(ctx->qp[0], &ctx->rwr, &bad_wr_recv);
810                                         if (ib_status != IB_SUCCESS)
811                                         {
812                                                 fprintf(stderr, "Couldn't post recv: rcnt=%d\n",
813                                                 rcnt);
814                                                 return 15;
815                                         }
816                                         ++rcnt;
817                                         ++ccnt;
818                                         PERF_DEBUG("ccnt = %d \n",ccnt);
819                                         PERF_DEBUG("rcnt = %d \n",rcnt);
820
821                                         p_wc_free = p_wc_done;
822                                         p_wc_free->p_next = NULL;
823                                         p_wc_done = NULL;
824                                 }
825                                 
826                                 
827                         } while (ib_status == IB_SUCCESS);
828                         if (ib_status != IB_NOT_FOUND) {
829                                 fprintf(stderr, "Poll Recieve CQ failed %d\n", ib_status);
830                                 return 12;
831                         }
832
833                 }
834         } else {
835                 /* client is posting and not receiving. */
836                 while (scnt < user_param->iters || ccnt < user_param->iters) {
837                         while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth ) {
838                                 ib_send_wr_t    *bad_wr;
839                                 tposted[scnt] = get_cycles();
840                                 ib_status = ib_post_send(qp, &ctx->wr, &bad_wr);
841                                 if (ib_status != IB_SUCCESS) {
842                                         fprintf(stderr, "Couldn't post send: scnt=%d ib_status %d\n",
843                                                 scnt,ib_status);
844                                         return 1;
845                                 }
846                                 ++scnt;
847                                 PERF_DEBUG("scnt = %d \n",scnt);
848                         }
849                         if (ccnt < user_param->iters) {
850                                 ib_wc_t wc;
851                                 ib_wc_t *p_wc_done,*p_wc_free;
852
853
854                                 p_wc_free = &wc;
855                                 p_wc_free->p_next = NULL;
856                                 p_wc_done = NULL;
857                                 
858 #if PORTED
859                                 if (user_param->use_event) {
860                                         struct ibv_cq *ev_cq;
861                                         void          *ev_ctx;
862                                         if (ibv_get_cq_event(ctx->channel, &ev_cq, &ev_ctx)) {
863                                                 fprintf(stderr, "Failed to get cq_event\n");
864                                                 return 1;
865                                         }                
866                                         if (ev_cq != ctx->cq) {
867                                                 fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq);
868                                                 return 1;
869                                         }
870                                         if (ibv_req_notify_cq(ctx->cq, 0)) {
871                                                 fprintf(stderr, "Couldn't request CQ notification\n");
872                                                 return 1;
873                                         }
874                                 } 
875 #endif
876                                 do {
877                                         ib_status = ib_poll_cq(ctx->scq, &p_wc_free, &p_wc_done);
878                                         if (ib_status == IB_SUCCESS ) {
879                                                 tcompleted[ccnt] = get_cycles();
880                                                 if (p_wc_done->status != IB_WCS_SUCCESS) {
881                                                         fprintf(stderr, "Completion wth error at %s:\n",
882                                                         user_param->servername ? "client" : "server");
883                                                         fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",
884                                                         p_wc_done->status, (int) p_wc_done->wr_id, p_wc_done->vendor_specific);
885                                                         fprintf(stderr, "scnt=%d, ccnt=%d\n",
886                                                         scnt, ccnt);
887                                                         return 1;
888                                                 }
889                                                 ccnt ++;
890                                                 p_wc_free = p_wc_done;
891                                                 p_wc_free->p_next = NULL;
892                                                 p_wc_done = NULL;
893                                         }
894
895                                 } while (ib_status == IB_SUCCESS );
896                                 if (ib_status != IB_NOT_FOUND) {
897                                         fprintf(stderr, "poll CQ failed %d\n", ib_status);
898                                         return 1;
899                                 }
900
901                                 PERF_DEBUG("ccnt = %d \n",ccnt);
902                         }
903                 }
904         }
905         return(0);
906 }
907
908
909 int __cdecl main(int argc, char *argv[])
910 {
911         struct pingpong_context *ctx;
912         struct pingpong_dest     *my_dest;
913         struct pingpong_dest    *rem_dest;
914         struct user_parameters  user_param;
915         char                            *ib_devname = NULL;
916         int                             port = 18515;
917         int                             ib_port = 1;
918         unsigned                        size = 65536;
919         SOCKET                  sockfd = INVALID_SOCKET;
920         int                             i = 0;
921         int                             size_max_pow = 24;
922         WSADATA         wsaData;
923         int                             iResult;
924
925         
926
927         /* init default values to user's parameters */
928         memset(&user_param, 0, sizeof(struct user_parameters));
929         user_param.mtu = 0;
930         user_param.iters = 1000;
931         user_param.tx_depth = 300;
932         user_param.servername = NULL;
933         user_param.use_event = 0;
934         user_param.duplex = 0;
935         /* Parameter parsing. */
936         while (1) {
937                 int c;
938
939                 static struct option long_options[] = {
940                         { "port",                       1,      NULL,   'p' },
941                         { "ib-dev",             1,      NULL,   'd' },
942                         { "ib-port",            1,      NULL,   'i' },
943                         { "mtu",                        1,      NULL,   'm' },
944                         { "connection", 1,      NULL,   'c' },
945                         { "size",                       1,      NULL,   's' },
946                         { "iters",                      1,      NULL,   'n' },
947                         { "tx-depth",           1,      NULL,   't' },
948                         { "all",                        0,      NULL,   'a' },
949                         { "bidirectional",      0,      NULL,   'b' },
950                         { "version",            0,      NULL,   'V' },
951                         { "events",             0,      NULL,   'e' },
952                         { 0 }
953                 };
954
955                 c = getopt_long(argc, argv, "p:d:i:m:c:s:n:t:ebaV", long_options, NULL);
956                 if (c == -1)
957                         break;
958
959                 switch (c) {
960                 case 'p':
961                         port = strtol(optarg, NULL, 0);
962                         if (port < 0 || port > 65535) {
963                                 usage(argv[0]);
964                                 return 1;
965                         }
966                         break;
967                 case 'e':
968                         ++user_param.use_event;
969                         break;
970                 case 'd':
971                         ib_devname = _strdup(optarg);
972                         break;
973                 case 'c':
974                         if (strcmp("UC",optarg)==0)
975                                 user_param.connection_type=UC;
976                         if (strcmp("UD",optarg)==0)
977                                 user_param.connection_type=UD;
978                         break;
979
980                 case 'm':
981                         user_param.mtu = strtol(optarg, NULL, 0);
982                         break;
983                 case 'a':
984                         user_param.all = ALL;
985                         break;
986                 case 'V':
987                         printf("send_bw version : %.2f\n",VERSION);
988                         return 0;
989                         break;
990                 case 'i':
991                         ib_port = strtol(optarg, NULL, 0);
992                         if (ib_port < 0) {
993                                 usage(argv[0]);
994                                 return 1;
995                         }
996                         break;
997
998                 case 's':
999                         size = (unsigned)_strtoui64(optarg, NULL, 0);
1000                         if (size < 1 || size > UINT_MAX / 2) {
1001                                 usage(argv[0]);
1002                                 return 1;
1003                         }
1004
1005                         break;
1006
1007                 case 't':
1008                         user_param.tx_depth = strtol(optarg, NULL, 0);
1009                         if (user_param.tx_depth < 1) { usage(argv[0]); return 1; }
1010                         break;
1011
1012                 case 'n':
1013                         user_param.iters = strtol(optarg, NULL, 0);
1014                         if (user_param.iters < 2) {
1015                                 usage(argv[0]);
1016                                 return 1;
1017                         }
1018
1019                         break;
1020
1021                 case 'b':
1022                         user_param.duplex = 1;
1023                         break;
1024                 case 'h':
1025                 default:
1026                         usage(argv[0]);
1027                         return 1;
1028                 }
1029         }
1030
1031         if (optind == argc - 1)
1032                 user_param.servername = _strdup(argv[optind]);
1033         else if (optind < argc) {
1034                 usage(argv[0]);
1035                 return 1;
1036         }
1037         printf("------------------------------------------------------------------\n");
1038         if (user_param.duplex == 1) 
1039                 printf("                    Send Bidirectional BW Test\n");
1040         else 
1041                 printf("                    Send BW Test\n");
1042
1043         printf("Inline data is used up to 400 bytes message\n");
1044         if (user_param.connection_type==RC) {
1045                 printf("Connection type : RC\n");
1046         } else if (user_param.connection_type==UC) { 
1047                 printf("Connection type : UC\n");
1048         } else {
1049                 printf("Connection type : UD\n");
1050         }
1051
1052         /* Done with parameter parsing. Perform setup. */
1053
1054         // Initialize Winsock
1055         iResult = WSAStartup(MAKEWORD(2,2), &wsaData);
1056         if (iResult != NO_ERROR) {
1057                 printf("Error at WSAStartup()\n");
1058                 return 1;
1059         }
1060         
1061         if (user_param.all == ALL && user_param.connection_type!=UD) {
1062                 /*since we run all sizes */
1063                 printf("test\n");
1064                 size = 8388608; /*2^23 */
1065         } else if (user_param.connection_type==UD ) {
1066                 printf("Max msg size in UD is 2048 changing to 2048\n");
1067                 size = 2048;
1068         }
1069         
1070         
1071         srand(GetCurrentProcessId() * GetTickCount());
1072
1073         //TODO: get pagesize from sysinfo
1074         page_size = 4096;
1075
1076         //TODO:get the device names
1077         
1078
1079         // init the context
1080         ctx = pp_init_ctx(size, ib_port, &user_param);
1081         if (!ctx)
1082                 return 1;
1083         
1084         sockfd = pp_open_port(ctx, user_param.servername, ib_port, port,&my_dest,&rem_dest,&user_param);
1085         if (sockfd == INVALID_SOCKET)
1086                 return 9;
1087         
1088 #if PORTED
1089         if (user_param.use_event) {
1090                 printf("Test with events.\n");
1091                 if (ibv_req_notify_cq(ctx->cq, 0)) {
1092                         fprintf(stderr, "Couldn't request CQ notification\n");
1093                         return 1;
1094                 } 
1095         }
1096 #endif
1097         printf("------------------------------------------------------------------\n");
1098         printf(" #bytes #iterations    BW peak[MB/sec]    BW average[MB/sec]  \n");
1099
1100         tposted = malloc(user_param.iters * sizeof *tposted);
1101
1102         if (!tposted) {
1103                 perror("malloc");
1104                 return 1;
1105         }
1106
1107         tcompleted = malloc(user_param.iters * sizeof *tcompleted);
1108
1109         if (!tcompleted) {
1110                 perror("malloc");
1111                 return 1;
1112         }
1113         
1114
1115         if (user_param.all == ALL) {
1116                 if (user_param.connection_type==UD) {
1117                         size_max_pow = 12;
1118                 }
1119                 for (i = 1; i < size_max_pow ; ++i) {
1120                         size = 1 << i;
1121                         if (user_param.duplex) {
1122                                 if(run_iter_bi(ctx, &user_param, rem_dest, size))
1123                                         return 17;
1124                         } else {
1125                                 if(run_iter_uni(ctx, &user_param, rem_dest, size))
1126                                         return 17;
1127                         }
1128                         if (user_param.servername) {
1129                                 print_report(user_param.iters, size, user_param.duplex, tposted, tcompleted);
1130                                 /* sync again for the sake of UC/UC */
1131                                 if(pp_client_exch_dest(sockfd, my_dest,rem_dest))
1132                                         return 19;
1133                                         
1134                         } else {
1135                                 if(pp_server_exch_dest(sockfd,my_dest,rem_dest))
1136                                         return 19;
1137                                         
1138                         }
1139
1140                 }
1141         } else {
1142                 if (user_param.duplex) {
1143                         if(run_iter_bi(ctx, &user_param,rem_dest, size))
1144                                 return 18;
1145                 } else {
1146                         if(run_iter_uni(ctx, &user_param,rem_dest, size))
1147                                 return 18;
1148                 }
1149                 if (user_param.servername) {
1150                         print_report(user_param.iters, size, user_param.duplex, tposted, tcompleted);
1151                 }
1152         }
1153
1154         /* close sockets */
1155         if (user_param.servername) {
1156                 pp_client_exch_dest(sockfd, my_dest,rem_dest);
1157         } else {
1158                 pp_server_exch_dest(sockfd, my_dest,rem_dest);
1159         }
1160
1161         send(sockfd, "done", sizeof "done",0);
1162         closesocket(sockfd);
1163
1164         free(tposted);
1165         free(tcompleted);
1166
1167         printf("------------------------------------------------------------------\n");
1168         goto end;
1169
1170
1171 end:    
1172         WSACleanup();
1173         return 0;
1174 }