[TOOLS] make -h option work by adding h to getopt() options string.
[mirror/winof/.git] / tools / perftests / user / send_bw / send_bw.c
1 /*
2  * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
4  * Portions Copyright (c) 2008 Microsoft Corporation.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  * $Id$
35  */
36
37 #include "getopt.h"
38 #include "get_clock.h"
39
40 #include "perf_defs.h"
41
42 #define SIGNAL 1
43 #define MAX_INLINE 400
44
45 struct user_parameters {
46         const char      *servername;
47         int                     connection_type;
48         int                     mtu;
49         int                     all; /* run all msg size */
50         int                     signal_comp;
51         int                     iters;
52         int                     tx_depth;
53         int                     duplex;
54         int                     use_event;
55 };
56
57 static int page_size;
58 cycles_t        *tposted;
59 cycles_t        *tcompleted;
60 int post_recv;
61
62
63 void
64 pp_cq_comp_cb(
65         IN              const   ib_cq_handle_t                          h_cq,
66         IN                              void                                            *cq_context )
67 {
68         UNUSED_PARAM( h_cq );
69         UNUSED_PARAM( cq_context);
70         return ;
71 }
72
73
74
75 static struct pingpong_context *pp_init_ctx(unsigned size,int port, 
76                                         struct user_parameters *user_parm)
77 {
78
79         struct pingpong_context *ctx;
80         ib_api_status_t                         ib_status = IB_SUCCESS; 
81         size_t                                  guid_count;
82         ib_net64_t                              *ca_guid_array;
83
84
85         
86         ctx = malloc(sizeof *ctx);
87         if (!ctx)
88                 return NULL;
89
90         ctx->qp = malloc(sizeof (ib_qp_handle_t));
91         if (!ctx->qp) {
92                 perror("malloc");
93                 return NULL;
94         }
95         ctx->qp_attr = malloc(sizeof (ib_qp_attr_t));
96         if (!ctx->qp_attr) {
97                 perror("malloc");
98                 return NULL;
99         }
100
101         ctx->size = size;
102         ctx->tx_depth = user_parm->tx_depth;
103         /* in case of UD need space for the GRH */
104         if (user_parm->connection_type==UD) {
105                 ctx->buf = malloc(( size + 40 ) * 2);
106                 if (!ctx->buf) {
107                         fprintf(stderr, "Couldn't allocate work buf.\n");
108                         return NULL;
109                 }
110                 memset(ctx->buf, 0, ( size + 40 ) * 2);
111         } else {
112                 ctx->buf = malloc( size * 2);
113                 if (!ctx->buf) {
114                         fprintf(stderr, "Couldn't allocate work buf.\n");
115                         return NULL;
116                 }
117                 memset(ctx->buf, 0, size * 2);
118         }
119
120         /*
121          * Open the AL instance
122          */
123         ib_status = ib_open_al(&ctx->al);
124         if(ib_status != IB_SUCCESS)
125         {
126                 fprintf(stderr,"ib_open_al failed status = %d\n", ib_status);
127                 return NULL;
128         }
129
130         /*
131          * Get the Local CA Guids
132          */
133         ib_status = ib_get_ca_guids(ctx->al, NULL, &guid_count);
134         if(ib_status != IB_INSUFFICIENT_MEMORY)
135         {
136                 fprintf(stderr,"ib_get_ca_guids1 failed status = %d\n", (uint32_t)ib_status);
137                 return NULL;
138         }
139         
140         /*
141          * If no CA's Present then return
142          */
143
144         if(guid_count == 0)
145                 return NULL;
146
147         
148         ca_guid_array = (ib_net64_t*)malloc(sizeof(ib_net64_t) * guid_count);
149         
150         ib_status = ib_get_ca_guids(ctx->al, ca_guid_array, &guid_count);
151         if(ib_status != IB_SUCCESS)
152         {
153                 fprintf(stderr,"ib_get_ca_guids2 failed with status = %d\n", ib_status);
154                 return NULL;
155         }
156
157         /*
158          * Open only the first HCA
159          */
160         /* Open the CA */
161         ib_status = ib_open_ca(ctx->al ,ca_guid_array[0] ,NULL,
162                 NULL,   //ca_context
163                 &ctx->ca);
164
165         if(ib_status != IB_SUCCESS)
166         {
167                 fprintf(stderr,"ib_open_ca failed with status = %d\n", ib_status);
168                 return NULL;
169         }
170
171         //xxx
172         //printf("ib_open_ca passed i=%d\n",i); 
173         //xxx
174
175
176         {
177
178                 /* Query the CA */
179                 uint32_t bsize = 0;
180                 ib_status = ib_query_ca(ctx->ca, NULL, &bsize);
181                 if(ib_status != IB_INSUFFICIENT_MEMORY)
182                 {
183                         fprintf(stderr, "Failed to query device props");
184                         return NULL;
185                 }
186
187                 ctx->ca_attr = (ib_ca_attr_t *)malloc(bsize);
188
189                 ib_status = ib_query_ca(ctx->ca, ctx->ca_attr, &bsize);
190                 if(ib_status != IB_SUCCESS)
191                 {
192                         printf("ib_query_ca failed with status = %d\n", ib_status);
193                         return NULL;
194                 }
195                 if (user_parm->mtu == 0) {/*user did not ask for specific mtu */
196                         if (ctx->ca_attr->dev_id == 23108) {
197                                 user_parm->mtu = 1024;
198                         } else {
199                                 user_parm->mtu = 2048;
200                         }
201                 }
202         }
203
204         ctx->channel = NULL;
205
206         ib_status = ib_alloc_pd(ctx->ca ,
207                                                 IB_PDT_NORMAL,
208                                                 ctx, //pd_context
209                                                 &ctx->pd);
210         if (ib_status != IB_SUCCESS) {
211                 fprintf(stderr, "Couldn't allocate PD\n");
212                 return NULL;
213         }
214
215
216         {
217                 ib_mr_create_t                  mr_create;
218                 ib_cq_create_t                  cq_create;
219                 /* We dont really want IBV_ACCESS_LOCAL_WRITE, but IB spec says:
220                  * The Consumer is not allowed to assign Remote Write or Remote Atomic to
221                  * a Memory Region that has not been assigned Local Write. */
222                 if (user_parm->connection_type==UD) {
223                         mr_create.length = (size + 40 ) * 2;
224                 } else {
225                         mr_create.length = size * 2;
226                 }
227                         
228                 mr_create.vaddr = ctx->buf;
229                 mr_create.access_ctrl = IB_AC_RDMA_WRITE| IB_AC_LOCAL_WRITE;
230                 
231                 ib_status = ib_reg_mem(ctx->pd ,&mr_create ,&ctx->lkey ,&ctx->rkey ,&ctx->mr);
232                 if (ib_status != IB_SUCCESS) {
233                         fprintf(stderr, "Couldn't allocate MR\n");
234                         return NULL;
235                 }
236
237                 if (user_parm->use_event) {
238                         cl_status_t cl_status;
239
240                         cl_status = cl_waitobj_create( FALSE, &ctx->cq_waitobj );
241                         if( cl_status != CL_SUCCESS ) {
242                                 ctx->cq_waitobj = NULL;
243                                 fprintf(stderr, "cl_waitobj_create() returned %s\n", CL_STATUS_MSG(cl_status) );
244                                 return NULL;
245                         }
246
247                         cq_create.h_wait_obj = ctx->cq_waitobj;
248                         cq_create.pfn_comp_cb = NULL;
249                 } else {
250                         cq_create.h_wait_obj = NULL;
251                         cq_create.pfn_comp_cb = pp_cq_comp_cb;
252                 }
253
254                 cq_create.size = user_parm->tx_depth*2;
255                 ib_status = ib_create_cq(ctx->ca,&cq_create ,ctx, NULL, &ctx->scq);
256                 if (ib_status != IB_SUCCESS) {
257                         fprintf(stderr, "Couldn't create CQ\n");
258                         fprintf(stderr, "ib_status = %d\n", ib_status);
259                         return NULL;
260                 }
261
262                 if (user_parm->use_event) {
263                         ib_status = ib_rearm_cq( ctx->scq, FALSE );
264                         if( ib_status )
265                         {
266                                 ib_destroy_cq( ctx->scq, NULL );
267                                 fprintf(stderr,"ib_rearm_cq returned %s\n", ib_get_err_str( ib_status ));
268                                 return NULL;
269                         }
270                 }
271         }
272
273         {
274                 ib_qp_create_t  qp_create;
275                 memset(&qp_create, 0, sizeof(ib_qp_create_t));
276                 qp_create.h_sq_cq       = ctx->scq;
277                 qp_create.h_rq_cq       = ctx->scq;
278                 qp_create.sq_depth      = user_parm->tx_depth;
279                 qp_create.rq_depth      = user_parm->tx_depth;
280                 qp_create.sq_sge        = 1;
281                 qp_create.rq_sge        = 1;
282                 //TODO MAX_INLINE
283
284                 switch (user_parm->connection_type) {
285                 case RC :
286                         qp_create.qp_type= IB_QPT_RELIABLE_CONN;
287                         break;
288                 case UC :
289                         qp_create.qp_type = IB_QPT_UNRELIABLE_CONN;
290                         break;
291                 case UD :
292                         qp_create.qp_type = IB_QPT_UNRELIABLE_DGRM;
293                         break;
294                 default:
295                         fprintf(stderr, "Unknown connection type %d \n",user_parm->connection_type);
296                         return NULL;
297                 }
298                 
299                 qp_create.sq_signaled = FALSE;
300                 /*attr.sq_sig_all = 0;*/
301
302                 ib_status = ib_create_qp(ctx->pd, &qp_create,NULL,NULL,&ctx->qp[0]);
303                 if (ib_status != IB_SUCCESS){
304                         fprintf(stderr, "Couldn't create QP\n");
305                         return NULL;
306                 }
307         }
308
309         {
310                 ib_qp_mod_t     qp_modify;
311                 ib_qp_attr_t    qp_attr;
312                 memset(&qp_modify, 0, sizeof(ib_qp_mod_t));
313                 qp_modify.req_state = IB_QPS_INIT;
314                 qp_modify.state.init.pkey_index = 0 ;
315                 qp_modify.state.init.primary_port = (uint8_t)port;
316                 if (user_parm->connection_type==UD) {
317                         qp_modify.state.init.qkey = 0x11111111;
318                 } else {
319                         qp_modify.state.init.access_ctrl = IB_AC_RDMA_WRITE | IB_AC_LOCAL_WRITE;
320                 }
321                 
322                 ib_status = ib_modify_qp(ctx->qp[0], &qp_modify);
323                 if (ib_status != IB_SUCCESS){
324                         fprintf(stderr, "Failed to modify QP to INIT\n");
325                         return NULL;
326                 }
327
328
329                 memset(&qp_attr, 0, sizeof(ib_qp_attr_t));
330                 ib_status = ib_query_qp(ctx->qp[0], &ctx->qp_attr[0]);
331                 if (ib_status != IB_SUCCESS){
332                         fprintf(stderr, "Failed to modify QP to INIT\n");
333                         return NULL;
334                 }
335                 fprintf(stderr, "max inline size %d\n",ctx->qp_attr[0].sq_max_inline);
336                 
337         }
338         return ctx;
339
340 }
341
342 static int pp_connect_ctx(struct pingpong_context *ctx, int port, int my_psn,
343                           struct pingpong_dest *dest, struct user_parameters *user_parm,int  index)
344 {
345
346         ib_api_status_t ib_status;
347         ib_qp_mod_t     attr;
348         memset(&attr, 0, sizeof(ib_qp_mod_t));
349
350         attr.req_state          = IB_QPS_RTR;
351         switch (user_parm->mtu) {
352         case 256 : 
353                 attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_LEN_256;
354                 break;
355         case 512 :
356                 attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_LEN_512;
357                 break;
358         case 1024 :
359                 attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_LEN_1024;
360                 break;
361         case 2048 :
362                 attr.state.rtr.primary_av.conn.path_mtu = IB_MTU_LEN_2048;
363                 break;
364         }
365         printf("Mtu : %d\n", user_parm->mtu);
366         attr.state.rtr.dest_qp  = (dest->qpn);
367         attr.state.rtr.rq_psn   = (dest->psn);
368         if (user_parm->connection_type==RC) {
369                 attr.state.rtr.resp_res = 1;
370                 attr.state.rtr.rnr_nak_timeout = 12;
371         }
372         attr.state.rtr.primary_av.grh_valid = 0;
373         attr.state.rtr.primary_av.dlid = dest->lid;
374         attr.state.rtr.primary_av.sl = 0;
375         attr.state.rtr.primary_av.path_bits = 0;
376         attr.state.rtr.primary_av.port_num = (uint8_t)port;
377         attr.state.rtr.primary_av.static_rate = IB_PATH_RECORD_RATE_10_GBS;
378         attr.state.rtr.opts = IB_MOD_QP_LOCAL_ACK_TIMEOUT |
379                                         IB_MOD_QP_RESP_RES |
380                                         IB_MOD_QP_PRIMARY_AV;
381
382
383         ib_status = ib_modify_qp(ctx->qp[0], &attr);
384         if(ib_status != IB_SUCCESS){
385                         fprintf(stderr, "Failed to modify UC QP to RTR\n");
386                         return 1;
387         }
388
389         if (user_parm->connection_type == UD) {
390                 ib_av_attr_t    av_attr;
391
392                 av_attr.grh_valid = 0;
393                 av_attr.dlid = dest->lid;
394                 av_attr.sl = 0;
395                 av_attr.path_bits = 0;
396                 av_attr.port_num = (uint8_t)port;
397                 av_attr.static_rate = IB_PATH_RECORD_RATE_10_GBS;
398
399                 ib_status = ib_create_av(ctx->pd,&av_attr, &ctx->av);
400                 if (ib_status != IB_SUCCESS) {
401                         fprintf(stderr, "Failed to create AH for UD\n");
402                         return 1;
403                 }
404         }
405
406
407         memset(&attr, 0, sizeof(ib_qp_mod_t));
408         attr.req_state  = IB_QPS_RTS;
409         attr.state.rts.sq_psn = my_psn;
410
411         if (user_parm->connection_type == RC) {
412                 attr.state.rts.resp_res = 1;
413                 attr.state.rts.local_ack_timeout = 14;
414                 attr.state.rts.retry_cnt = 7;
415                 attr.state.rts.rnr_retry_cnt = 7;
416                 attr.state.rts.opts = IB_MOD_QP_RNR_RETRY_CNT |
417                                                 IB_MOD_QP_RETRY_CNT |
418                                                 IB_MOD_QP_LOCAL_ACK_TIMEOUT;
419                                                 
420         }       
421         ib_status = ib_modify_qp(ctx->qp[index], &attr);
422         if(ib_status != IB_SUCCESS){
423                 fprintf(stderr, "Failed to modify UC QP to RTS\n");
424                 return 1;
425         }
426
427         
428         
429         /* post receive max msg size*/
430         {
431                 int i;
432                 ib_recv_wr_t      *bad_wr_recv;
433                 //receive
434                 ctx->rwr.wr_id      = PINGPONG_RECV_WRID;
435                 ctx->rwr.ds_array = &ctx->recv_list;
436                 ctx->rwr.num_ds = 1;
437                 ctx->rwr.p_next = NULL;
438                 ctx->recv_list.vaddr = (uintptr_t) ctx->buf;
439                 if (user_parm->connection_type==UD) {
440                         ctx->recv_list.length = ctx->size + 40;
441                 } else {
442                         ctx->recv_list.length = ctx->size;
443                 }
444                 ctx->recv_list.lkey = ctx->lkey;
445                 for (i = 0; i < user_parm->tx_depth; ++i) {
446                         ib_status = ib_post_recv(ctx->qp[index], &ctx->rwr, &bad_wr_recv);
447                         if (ib_status != IB_SUCCESS)
448                         {
449                                 fprintf(stderr, "Couldn't post recv: counter=%d\n", i);
450                                 return 14;
451                         }
452                         PERF_DEBUG("rcnt = %d \n",i);
453                 }   
454         }
455         post_recv = user_parm->tx_depth;
456
457         return 0;
458 }
459
460 static SOCKET pp_open_port(struct pingpong_context *ctx, const char * servername,
461                         int ib_port, int port, struct pingpong_dest **p_my_dest,
462                         struct pingpong_dest **p_rem_dest,struct user_parameters *user_parm)
463 {
464         struct pingpong_dest    *my_dest;
465         struct pingpong_dest    *rem_dest;
466         SOCKET                          sockfd;
467         int                                     rc;
468         int                                     i;
469         int                                     numofqps = 1;
470         
471         /* Create connection between client and server.
472          * We do it by exchanging data over a TCP socket connection. */
473
474         
475         my_dest = malloc( sizeof (struct pingpong_dest) * numofqps);
476         if (!my_dest){
477                 perror("malloc");
478                 return INVALID_SOCKET;
479         }
480         memset( my_dest, 0, sizeof (struct pingpong_dest) * numofqps );
481
482         rem_dest = malloc(sizeof (struct pingpong_dest) * numofqps );
483         if (!rem_dest){
484                 perror("malloc");
485                 return INVALID_SOCKET;
486         }
487         memset( rem_dest, 0, sizeof (struct pingpong_dest) * numofqps );
488
489         sockfd = servername ? pp_client_connect(servername, port) :
490                 pp_server_connect(port);
491
492         if (sockfd  == INVALID_SOCKET) {
493                 printf("pp_connect_sock(%s,%d) failed (%d)!\n",
494                        servername, port, sockfd);
495                 return INVALID_SOCKET;
496         }
497
498         
499         for (i =0 ;i<numofqps;i ++) 
500         {
501                 /* Create connection between client and server.
502                 * We do it by exchanging data over a TCP socket connection. */
503                 
504                 my_dest[i].lid = ctx->ca_attr->p_port_attr[ib_port-1].lid;
505                 my_dest[i].psn = cl_hton32(rand() & 0xffffff);
506                 if (!my_dest[i].lid) {
507                         fprintf(stderr, "Local lid 0x0 detected. Is an SM running?\n");
508                         return 1;
509                 }
510                 my_dest[i].qpn = ctx->qp_attr[i].num;
511                 /* TBD this should be changed inot VA and different key to each qp */
512                 my_dest[i].rkey = ctx->rkey;
513                 my_dest[i].vaddr = (uintptr_t)ctx->buf + ctx->size;
514
515                 printf("  local address:  LID %#04x, QPN %#06x, PSN %#06x, "
516                 "RKey %#08x VAddr %#016Lx\n",
517                 my_dest[i].lid, my_dest[i].qpn, my_dest[i].psn,
518                 my_dest[i].rkey, my_dest[i].vaddr);
519
520                 rc = servername ? pp_client_exch_dest(sockfd, &my_dest[i],&rem_dest[i]):
521                                                 pp_server_exch_dest(sockfd, &my_dest[i],&rem_dest[i]);
522                 if (rc)
523                         return INVALID_SOCKET;
524                 printf("  remote address: LID %#04x, QPN %#06x, PSN %#06x, "
525                 "RKey %#08x VAddr %#016Lx\n",
526                 rem_dest[i].lid, rem_dest[i].qpn, rem_dest[i].psn,
527                 rem_dest[i].rkey, rem_dest[i].vaddr);
528
529                 if (pp_connect_ctx(ctx, ib_port, my_dest[i].psn, &rem_dest[i], user_parm, i))
530                         return INVALID_SOCKET;
531                 /* An additional handshake is required *after* moving qp to RTR.
532                 Arbitrarily reuse exch_dest for this purpose. */
533                 rc = servername ? pp_client_exch_dest(sockfd, &my_dest[i],&rem_dest[i]):
534                                                 pp_server_exch_dest(sockfd, &my_dest[i],&rem_dest[i]);
535                 if (rc)
536                         return INVALID_SOCKET;
537         }
538         *p_rem_dest = rem_dest;
539         *p_my_dest = my_dest;
540         return sockfd;
541 }
542
543
544 static void usage(const char *argv0)
545 {
546         printf("Usage:\n");
547         printf("  %s            start a server and wait for connection\n", argv0);
548         printf("  %s <host>     connect to server at <host>\n", argv0);
549         printf("\n");
550         printf("Options:\n");
551         printf("  -p, --port=<port>         listen on/connect to port <port> (default 18515)\n");
552         printf("  -i, --ib-port=<port>      use port <port> of IB device (default 1)\n");
553         printf("  -c, --connection=<RC/UC>  connection type RC/UC/UD (default RC)\n");
554         printf("  -m, --mtu=<mtu>           mtu size (default 1024)\n");
555         printf("  -s, --size=<size>         size of message to exchange (default 65536)\n");
556         printf("  -a, --all                 Run sizes from 2 till 2^23\n");
557         printf("  -t, --tx-depth=<dep>      size of tx queue (default 300)\n");
558         printf("  -n, --iters=<iters>       number of exchanges (at least 2, default 1000)\n");
559         printf("  -b, --bidirectional       measure bidirectional bandwidth (default unidirectional)\n");
560         printf("  -V, --version             display version number\n");
561 }
562
563 static void print_report(unsigned int iters, unsigned size, int duplex,
564                          cycles_t *tposted, cycles_t *tcompleted)
565 {
566         double                  cycles_to_units;
567         uint64_t                        tsize;  /* Transferred size, in megabytes */
568         int                             i, j;
569         int                             opt_posted = 0, opt_completed = 0;
570         cycles_t                        opt_delta;
571         cycles_t                        t;
572
573
574         opt_delta = tcompleted[opt_posted] - tposted[opt_completed];
575
576         /* Find the peak bandwidth */
577         for (i = 0; i < (int)iters; ++i)
578                 for (j = i; j < (int)iters; ++j) {
579                         t = (tcompleted[j] - tposted[i]) / (j - i + 1);
580                         if (t < opt_delta) {
581                                 opt_delta  = t;
582                                 opt_posted = i;
583                                 opt_completed = j;
584                         }
585                 }
586
587         cycles_to_units = get_cpu_mhz();
588
589         tsize = duplex ? 2 : 1;
590         tsize = tsize * size;
591         printf("%7d        %d            %7.2f               %7.2f \n",
592                size,iters,tsize * cycles_to_units / opt_delta / 0x100000,
593                 (uint64_t)tsize * iters * cycles_to_units /(tcompleted[iters - 1] - tposted[0]) / 0x100000);
594 }
595
596
597 int run_iter_bi(struct pingpong_context *ctx, struct user_parameters *user_param,
598                 struct pingpong_dest *rem_dest, int size)
599 {
600
601         ib_qp_handle_t  qp;
602         int                             scnt, ccnt, rcnt;
603         ib_recv_wr_t            *bad_wr_recv;
604         ib_api_status_t ib_status;
605
606         /*********************************************
607          * Important note :
608          * In case of UD/UC this is NOT the way to measure
609          * BW since we are running with loop on the send side
610          * while we should run on the receive side or enable retry in SW
611          * Since the sender may be faster than the reciver than although
612          * we had posted receive it is not enough and might end this will
613          * result in deadlock of test since both sides are stuck on poll cq
614          * In this test i do not solve this for the general test ,need to write
615          * separate test for UC/UD but in case the tx_depth is ~1/3 from the
616          * number of iterations this should be ok .
617          * Also note that the sender is limited in the number of send, ans
618          * i try to make the receiver full 
619          *********************************************/
620         /* send */
621         if (user_param->connection_type==UD) {
622                 ctx->list.vaddr = (uintptr_t) ctx->buf + 40;
623                 ctx->wr.dgrm.ud.h_av = ctx->av;
624                 ctx->wr.dgrm.ud.remote_qp  = rem_dest->qpn;
625                 ctx->wr.dgrm.ud.remote_qkey = 0x11111111;
626         } else {
627                 ctx->list.vaddr = (uintptr_t) ctx->buf;
628         }
629         ctx->list.lkey = ctx->lkey;
630         ctx->wr.wr_id = PINGPONG_SEND_WRID;
631         ctx->wr.ds_array = &ctx->list;
632         ctx->wr.num_ds = 1;
633         ctx->wr.wr_type = WR_SEND;
634         ctx->wr.p_next       = NULL;
635         
636         if ((uint32_t)size > ctx->qp_attr[0].sq_max_inline) { /*complaince to perf_main */
637                 ctx->wr.send_opt = IB_SEND_OPT_SIGNALED;
638         } else {
639                 ctx->wr.send_opt = IB_SEND_OPT_SIGNALED | IB_SEND_OPT_INLINE;
640         }
641         
642         ctx->list.length = size;
643         scnt = 0;
644         ccnt = 0;
645         rcnt = 0;
646         qp = ctx->qp[0];
647
648         while (ccnt < user_param->iters || rcnt < user_param->iters ) {
649                 while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth / 2) {
650                         ib_send_wr_t    *bad_wr;
651                         tposted[scnt] = get_cycles();
652                         ib_status = ib_post_send(qp, &ctx->wr, &bad_wr);
653                         if (ib_status != IB_SUCCESS) {
654                                 fprintf(stderr, "Couldn't post send: scnt=%d ib_status %d\n",
655                                         scnt,ib_status);
656                                 return 1;
657                         }
658                         ++scnt;
659                         PERF_DEBUG("scnt = %d \n",scnt);
660                 }
661
662                 {
663                         ib_wc_t wc;
664                         ib_wc_t *p_wc_done,*p_wc_free;
665
666
667                         p_wc_free = &wc;
668                         p_wc_free->p_next = NULL;
669                         p_wc_done = NULL;
670
671                         if (user_param->use_event) {
672                                 cl_status_t     cl_status;
673
674                                 PERF_DEBUG("%s:%d IN cl_waitobj_wait_on", __FUNCTION__, __LINE__);
675                                 cl_status = cl_waitobj_wait_on( ctx->cq_waitobj, EVENT_NO_TIMEOUT, TRUE );
676                                 if( cl_status != CL_SUCCESS )
677                                 {
678                                         fprintf(stderr, "cl_waitobj_wait_on() (%d)\n", cl_status);
679                                         return 1;
680                                 }
681                         } 
682
683                         do {
684                                 ib_status = ib_poll_cq(ctx->scq, &p_wc_free, &p_wc_done);
685                                 if (ib_status == IB_SUCCESS ) {
686                                         if (p_wc_done->status != IB_WCS_SUCCESS) {
687                                                 fprintf(stderr, "Completion wth error at %s:\n",
688                                                 user_param->servername ? "client" : "server");
689                                                 fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",
690                                                 p_wc_done->status, (int) p_wc_done->wr_id, p_wc_done->vendor_specific);
691                                                 fprintf(stderr, "scnt=%d, ccnt=%d\n",
692                                                 scnt, ccnt);
693                                                 return 1;
694                                         }
695
696                                         switch ((int) p_wc_done->wr_id) {
697                                         case PINGPONG_SEND_WRID:
698                                                 tcompleted[ccnt] = get_cycles();
699                                                 ++ccnt;
700                                                 break;
701                                         case PINGPONG_RECV_WRID:
702                                                 if (--post_recv <= user_param->tx_depth - 2) {
703                                                         while (rcnt < user_param->iters && (user_param->tx_depth - post_recv) > 0 ) {
704                                                                 post_recv++;
705                                                                 ib_status = ib_post_recv(ctx->qp[0], &ctx->rwr, &bad_wr_recv);
706                                                                 if (ib_status != IB_SUCCESS)
707                                                                 {
708                                                                         fprintf(stderr, "Couldn't post recv: rcnt=%d\n",
709                                                                         rcnt);
710                                                                         return 15;
711                                                                 }
712                                                         }
713                                                 }
714                                                 ++rcnt;
715                                                 break;
716                                         default:
717                                                 fprintf(stderr, "Completion for unknown wr_id %d\n",
718                                                         (int) wc.wr_id);
719                                                 break;
720                                         }
721                                         p_wc_free = p_wc_done;
722                                         p_wc_free->p_next = NULL;
723                                         p_wc_done = NULL;
724                                         PERF_DEBUG("ccnt = %d \n",ccnt);
725                                         PERF_DEBUG("rcnt = %d \n",rcnt);
726                                 }
727                         } while (ib_status == IB_SUCCESS );
728
729                         if (ib_status != IB_NOT_FOUND) {
730                                 fprintf(stderr, "poll CQ failed %d\n", ib_status);
731                                 return 1;
732                         }
733
734                         if (user_param->use_event) {
735                                 ib_status = ib_rearm_cq( ctx->scq, FALSE );
736                                 if( ib_status )
737                                 {
738                                         ib_destroy_cq( ctx->scq, NULL );
739                                         fprintf(stderr,"ib_rearm_cq returned %s\n", ib_get_err_str( ib_status ));
740                                         return 1;
741                                 }
742                         }
743                 }
744         }
745         return(0);
746 }
747
748
749 int run_iter_uni(struct pingpong_context *ctx, struct user_parameters *user_param,
750                  struct pingpong_dest *rem_dest, int size)
751 {
752
753         ib_qp_handle_t  qp;
754         int                             scnt, ccnt, rcnt;
755         ib_recv_wr_t            *bad_wr_recv;
756         ib_api_status_t ib_status;
757
758
759         /* send */
760         if (user_param->connection_type==UD) {
761                 ctx->list.vaddr = (uintptr_t) ctx->buf + 40;
762                 ctx->wr.dgrm.ud.h_av = ctx->av;
763                 ctx->wr.dgrm.ud.remote_qp  = rem_dest->qpn;
764                 ctx->wr.dgrm.ud.remote_qkey = 0x11111111;
765         } else {
766                 ctx->list.vaddr = (uintptr_t) ctx->buf;
767         }
768         ctx->list.lkey = ctx->lkey;
769         ctx->wr.wr_id = PINGPONG_SEND_WRID;
770         ctx->wr.ds_array = &ctx->list;
771         ctx->wr.num_ds = 1;
772         ctx->wr.wr_type = WR_SEND;
773         ctx->wr.p_next       = NULL;
774
775         
776         if ((uint32_t)size > ctx->qp_attr[0].sq_max_inline) { /*complaince to perf_main */
777                 ctx->wr.send_opt = IB_SEND_OPT_SIGNALED;
778         } else {
779                 ctx->wr.send_opt = IB_SEND_OPT_SIGNALED | IB_SEND_OPT_INLINE;
780         }
781         ctx->list.length = size;
782         
783         scnt = 0;
784         ccnt = 0;
785         rcnt = 0;
786         qp = ctx->qp[0];
787         if (!user_param->servername) {
788                 while (rcnt < user_param->iters) {
789                         ib_wc_t wc;
790                         ib_wc_t *p_wc_done,*p_wc_free;
791
792                         p_wc_free = &wc;
793                         p_wc_done = NULL;
794                         p_wc_free->p_next = NULL;
795                         
796                         /*Server is polling on receive first */
797                         if (user_param->use_event) {
798                                 cl_status_t     cl_status;
799
800                                 PERF_DEBUG("%s:%d IN cl_waitobj_wait_on\n", __FUNCTION__, __LINE__);
801                                 cl_status = cl_waitobj_wait_on( ctx->cq_waitobj, EVENT_NO_TIMEOUT, TRUE );
802                                 if( cl_status != CL_SUCCESS )
803                                 {
804                                         fprintf(stderr, "cl_waitobj_wait_on() (%d)\n", cl_status);
805                                         return 1;
806                                 }
807                                 PERF_DEBUG("%s:%d OUT cl_waitobj_wait_on\n", __FUNCTION__, __LINE__);
808                         } 
809
810                         do {
811
812                                 ib_status = ib_poll_cq(ctx->scq, &p_wc_free, &p_wc_done);
813                                 if (ib_status == IB_SUCCESS) {
814                                         tcompleted[ccnt] = get_cycles();
815                                         if (p_wc_done->status != IB_WCS_SUCCESS) {
816                                                 fprintf(stderr, "Completion wth error at %s:\n",
817                                                 user_param->servername ? "client" : "server");
818                                                 fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",
819                                                 p_wc_done->status, (int) p_wc_done->wr_id, p_wc_done->vendor_specific);
820                                                 fprintf(stderr, "scnt=%d, ccnt=%d\n",
821                                                 scnt, ccnt);
822                                                 return 1;
823                                         }
824                                         
825                                         ib_status = ib_post_recv(ctx->qp[0], &ctx->rwr, &bad_wr_recv);
826                                         if (ib_status != IB_SUCCESS)
827                                         {
828                                                 fprintf(stderr, "Couldn't post recv: rcnt=%d\n",
829                                                 rcnt);
830                                                 return 15;
831                                         }
832                                         ++rcnt;
833                                         ++ccnt;
834                                         PERF_DEBUG("ccnt = %d \n",ccnt);
835                                         PERF_DEBUG("rcnt = %d \n",rcnt);
836
837                                         p_wc_free = p_wc_done;
838                                         p_wc_free->p_next = NULL;
839                                         p_wc_done = NULL;
840                                 }
841                         } while (ib_status == IB_SUCCESS);
842                         if (ib_status != IB_NOT_FOUND) {
843                                 fprintf(stderr, "Poll Receive CQ failed %d\n", ib_status);
844                                 return 12;
845                         }
846
847                         if (user_param->use_event) {
848                                 ib_status = ib_rearm_cq( ctx->scq, FALSE );
849                                 if( ib_status )
850                                 {
851                                         ib_destroy_cq( ctx->scq, NULL );
852                                         fprintf(stderr,"ib_rearm_cq returned %s\n", ib_get_err_str( ib_status ));
853                                         return 1;
854                                 }
855                         }
856
857                 }
858         } else {
859                 /* client is posting and not receiving. */
860                 while (scnt < user_param->iters || ccnt < user_param->iters) {
861                         while (scnt < user_param->iters && (scnt - ccnt) < user_param->tx_depth ) {
862                                 ib_send_wr_t    *bad_wr;
863
864                                 tposted[scnt] = get_cycles();
865                                 ib_status = ib_post_send(qp, &ctx->wr, &bad_wr);
866                                 if (ib_status != IB_SUCCESS) {
867                                         fprintf(stderr, "Couldn't post send: scnt=%d ib_status %d\n",
868                                                 scnt,ib_status);
869                                         return 1;
870                                 }
871                                 ++scnt;
872                                 PERF_DEBUG("scnt = %d \n",scnt);
873                         }
874                         if (ccnt < user_param->iters) {
875                                 ib_wc_t wc;
876                                 ib_wc_t *p_wc_done,*p_wc_free;
877
878
879                                 p_wc_free = &wc;
880                                 p_wc_free->p_next = NULL;
881                                 p_wc_done = NULL;
882
883                                 if ( (user_param->use_event) ) {
884                                         cl_status_t     cl_status;
885
886                                         PERF_DEBUG("%s:%d IN cl_waitobj_wait_on\n", __FUNCTION__, __LINE__);
887                                         cl_status = cl_waitobj_wait_on( ctx->cq_waitobj, EVENT_NO_TIMEOUT, TRUE );
888                                         if( cl_status != CL_SUCCESS )
889                                         {
890                                                 fprintf(stderr, "cl_waitobj_wait_on() (%d)\n", cl_status);
891                                                 return 1;
892                                         }
893                                         PERF_DEBUG("%s:%d OUT cl_waitobj_wait_on\n", __FUNCTION__, __LINE__);
894                                 } 
895
896                                 do {
897                                         ib_status = ib_poll_cq(ctx->scq, &p_wc_free, &p_wc_done);
898                                         if (ib_status == IB_SUCCESS ) {
899                                                 tcompleted[ccnt] = get_cycles();
900                                                 if (p_wc_done->status != IB_WCS_SUCCESS) {
901                                                         fprintf(stderr, "Completion wth error at %s:\n",
902                                                         user_param->servername ? "client" : "server");
903                                                         fprintf(stderr, "Failed status %d: wr_id %d syndrom 0x%x\n",
904                                                         p_wc_done->status, (int) p_wc_done->wr_id, p_wc_done->vendor_specific);
905                                                         fprintf(stderr, "scnt=%d, ccnt=%d\n",
906                                                         scnt, ccnt);
907                                                         return 1;
908                                                 }
909                                                 ccnt ++;
910                                                 p_wc_free = p_wc_done;
911                                                 p_wc_free->p_next = NULL;
912                                                 p_wc_done = NULL;
913                                         }
914                                         } while (ib_status == IB_SUCCESS );
915                                 if (ib_status != IB_NOT_FOUND) {
916                                         fprintf(stderr, "poll CQ failed %d\n", ib_status);
917                                         return 1;
918                                 }
919
920                                 if ( (user_param->use_event) ) {
921                                         ib_status = ib_rearm_cq( ctx->scq, FALSE );
922                                         if( ib_status )
923                                         {
924                                                 ib_destroy_cq( ctx->scq, NULL );
925                                                 fprintf(stderr,"ib_rearm_cq returned %s\n", ib_get_err_str( ib_status ));
926                                                 return 1;
927                                         }
928                                 }
929
930                                 PERF_DEBUG("ccnt = %d \n",ccnt);
931                         }
932                 }
933         }
934         return(0);
935 }
936
937
938 int __cdecl main(int argc, char *argv[])
939 {
940         struct pingpong_context         *ctx;
941         struct pingpong_dest            *my_dest;
942         struct pingpong_dest            *rem_dest;
943         struct user_parameters          user_param;
944         char                            *ib_devname = NULL;
945         int                             port = 18515;
946         int                             ib_port = 1;
947         unsigned                        size = 65536;
948         SOCKET                          sockfd = INVALID_SOCKET;
949         int                             i = 0;
950         int                             size_max_pow = 24;
951         WSADATA                         wsaData;
952         int                             iResult;
953
954
955         /* init default values to user's parameters */
956         memset(&user_param, 0, sizeof(struct user_parameters));
957         user_param.mtu = 0;
958         user_param.iters = 1000;
959         user_param.tx_depth = 1000;
960         user_param.servername = NULL;
961         user_param.use_event = 0;
962         user_param.duplex = 0;
963         /* Parameter parsing. */
964         while (1) {
965                 int c;
966
967                 static struct option long_options[] = {
968                         { "port",                       1,      NULL,   'p' },
969                         { "ib-dev",             1,      NULL,   'd' },
970                         { "ib-port",            1,      NULL,   'i' },
971                         { "mtu",                        1,      NULL,   'm' },
972                         { "connection", 1,      NULL,   'c' },
973                         { "size",                       1,      NULL,   's' },
974                         { "iters",                      1,      NULL,   'n' },
975                         { "tx-depth",           1,      NULL,   't' },
976                         { "all",                        0,      NULL,   'a' },
977                         { "bidirectional",      0,      NULL,   'b' },
978                         { "version",            0,      NULL,   'V' },
979                         { "events",             0,      NULL,   'e' },
980                         { 0 }
981                 };
982
983                 c = getopt_long(argc, argv, "p:d:i:m:c:s:n:t:ebaVh", long_options, NULL);
984                 if (c == -1)
985                         break;
986
987                 switch (c) {
988                 case 'p':
989                         port = strtol(optarg, NULL, 0);
990                         if (port < 0 || port > 65535) {
991                                 usage(argv[0]);
992                                 return 1;
993                         }
994                         break;
995                 case 'e':
996                         ++user_param.use_event;
997                         break;
998                 case 'd':
999                         ib_devname = _strdup(optarg);
1000                         break;
1001                 case 'c':
1002                         if (strcmp("UC",optarg)==0)
1003                                 user_param.connection_type=UC;
1004                         if (strcmp("UD",optarg)==0)
1005                                 user_param.connection_type=UD;
1006                         break;
1007
1008                 case 'm':
1009                         user_param.mtu = strtol(optarg, NULL, 0);
1010                         break;
1011                 case 'a':
1012                         user_param.all = ALL;
1013                         break;
1014                 case 'V':
1015                         printf("send_bw version : %.2f\n",VERSION);
1016                         return 0;
1017                         break;
1018                 case 'i':
1019                         ib_port = strtol(optarg, NULL, 0);
1020                         if (ib_port <= 0) {
1021                                 usage(argv[0]);
1022                                 return 1;
1023                         }
1024                         break;
1025
1026                 case 's':
1027                         size = (unsigned)_strtoui64(optarg, NULL, 0);
1028                         if (size < 1 || size > UINT_MAX / 2) {
1029                                 usage(argv[0]);
1030                                 return 1;
1031                         }
1032
1033                         break;
1034
1035                 case 't':
1036                         user_param.tx_depth = strtol(optarg, NULL, 0);
1037                         if (user_param.tx_depth < 1) { usage(argv[0]); return 1; }
1038                         break;
1039
1040                 case 'n':
1041                         user_param.iters = strtol(optarg, NULL, 0);
1042                         if (user_param.iters < 2) {
1043                                 usage(argv[0]);
1044                                 return 1;
1045                         }
1046
1047                         break;
1048
1049                 case 'b':
1050                         user_param.duplex = 1;
1051                         break;
1052                 case 'h':
1053                 default:
1054                         usage(argv[0]);
1055                         return 1;
1056                 }
1057         }
1058
1059         if (optind == argc - 1)
1060                 user_param.servername = _strdup(argv[optind]);
1061         else if (optind < argc) {
1062                 usage(argv[0]);
1063                 return 1;
1064         }
1065         printf("------------------------------------------------------------------\n");
1066         if (user_param.duplex == 1) 
1067                 printf("                    Send Bidirectional BW Test\n");
1068         else 
1069                 printf("                    Send BW Test\n");
1070
1071         printf("Inline data is used up to 400 bytes message\n");
1072         if (user_param.connection_type==RC) {
1073                 printf("Connection type : RC\n");
1074         } else if (user_param.connection_type==UC) { 
1075                 printf("Connection type : UC\n");
1076         } else {
1077                 printf("Connection type : UD\n");
1078         }
1079
1080         /* Done with parameter parsing. Perform setup. */
1081
1082         // Initialize Winsock
1083         iResult = WSAStartup(MAKEWORD(2,2), &wsaData);
1084         if (iResult != NO_ERROR) {
1085                 printf("Error at WSAStartup()\n");
1086                 return 1;
1087         }
1088         
1089         if (user_param.all == ALL && user_param.connection_type!=UD) {
1090                 /*since we run all sizes */
1091                 printf("test\n");
1092                 size = 8388608; /*2^23 */
1093         } else if (user_param.connection_type==UD ) {
1094                 printf("Max msg size in UD is 2048 changing to 2048\n");
1095                 size = 2048;
1096         }
1097         
1098         
1099         srand(GetCurrentProcessId() * GetTickCount());
1100
1101         //TODO: get pagesize from sysinfo
1102         page_size = 4096;
1103
1104         //TODO:get the device names
1105         
1106
1107         // init the context
1108         ctx = pp_init_ctx(size, ib_port, &user_param);
1109         if (!ctx)
1110                 return 1;
1111         
1112         sockfd = pp_open_port(ctx, user_param.servername, ib_port, port,&my_dest,&rem_dest,&user_param);
1113         if (sockfd == INVALID_SOCKET)
1114                 return 9;
1115         
1116         if (user_param.use_event) {
1117                 printf("Test with events.\n");
1118         }
1119
1120         printf("------------------------------------------------------------------\n");
1121         printf(" #bytes #iterations    BW peak[MB/sec]    BW average[MB/sec]  \n");
1122
1123         tposted = malloc(user_param.iters * sizeof *tposted);
1124
1125         if (!tposted) {
1126                 perror("malloc");
1127                 return 1;
1128         }
1129
1130         tcompleted = malloc(user_param.iters * sizeof *tcompleted);
1131
1132         if (!tcompleted) {
1133                 perror("malloc");
1134                 return 1;
1135         }
1136         
1137
1138         if (user_param.all == ALL) {
1139                 if (user_param.connection_type==UD) {
1140                         size_max_pow = 12;
1141                 }
1142                 for (i = 1; i < size_max_pow ; ++i) {
1143                         size = 1 << i;
1144                         if (user_param.duplex) {
1145                                 if(run_iter_bi(ctx, &user_param, rem_dest, size))
1146                                         return 17;
1147                         } else {
1148                                 if(run_iter_uni(ctx, &user_param, rem_dest, size))
1149                                         return 17;
1150                         }
1151                         if (user_param.servername) {
1152                                 print_report(user_param.iters, size, user_param.duplex, tposted, tcompleted);
1153                                 /* sync again for the sake of UC/UC */
1154                                 if(pp_client_exch_dest(sockfd, my_dest,rem_dest))
1155                                         return 19;
1156                                         
1157                         } else {
1158                                 if(pp_server_exch_dest(sockfd,my_dest,rem_dest))
1159                                         return 19;
1160                                         
1161                         }
1162
1163                 }
1164         } else {
1165                 if (user_param.duplex) {
1166                         if(run_iter_bi(ctx, &user_param,rem_dest, size))
1167                                 return 18;
1168                 } else {
1169                         if(run_iter_uni(ctx, &user_param,rem_dest, size))
1170                                 return 18;
1171                 }
1172                 if (user_param.servername) {
1173                         print_report(user_param.iters, size, user_param.duplex, tposted, tcompleted);
1174                 }
1175         }
1176
1177         /* close sockets */
1178         if (user_param.servername) {
1179                 pp_client_exch_dest(sockfd, my_dest,rem_dest);
1180         } else {
1181                 pp_server_exch_dest(sockfd, my_dest,rem_dest);
1182         }
1183
1184         send(sockfd, "done", sizeof "done",0);
1185         closesocket(sockfd);
1186
1187         free(tposted);
1188         free(tcompleted);
1189
1190         printf("------------------------------------------------------------------\n");
1191         goto end;
1192
1193
1194 end:
1195     if (user_param.use_event) {
1196                 cl_status_t cl_status;
1197
1198                 cl_status = cl_waitobj_destroy( ctx->cq_waitobj );
1199                 if( cl_status != CL_SUCCESS )
1200                 {
1201                         fprintf (stderr,
1202                                 "cl_waitobj_destroy() returned %s\n", CL_STATUS_MSG(cl_status));
1203                 }
1204         }
1205
1206         WSACleanup();
1207         return 0;
1208 }