librdmacm: use comp_channel to enhance scalability.
[mirror/winof/.git] / ulp / librdmacm / src / cma.cpp
1 /*\r
2  * Copyright (c) 2005-2009 Intel Corporation.  All rights reserved.\r
3  *\r
4  * This software is available to you under the OpenIB.org BSD license\r
5  * below:\r
6  *\r
7  *     Redistribution and use in source and binary forms, with or\r
8  *     without modification, are permitted provided that the following\r
9  *     conditions are met:\r
10  *\r
11  *      - Redistributions of source code must retain the above\r
12  *        copyright notice, this list of conditions and the following\r
13  *        disclaimer.\r
14  *\r
15  *      - Redistributions in binary form must reproduce the above\r
16  *        copyright notice, this list of conditions and the following\r
17  *        disclaimer in the documentation and/or other materials\r
18  *        provided with the distribution.\r
19  *\r
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,\r
21  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
22  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AWV\r
23  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS\r
24  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN\r
25  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN\r
26  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE\r
27  * SOFTWARE.\r
28  */\r
29 \r
30 #include <windows.h>\r
31 #include <winsock2.h>\r
32 #include <stdio.h>\r
33 #include <iphlpapi.h>\r
34 \r
35 #include <rdma/rdma_cma.h>\r
36 #include <infiniband/verbs.h>\r
37 #include <comp_channel.h>\r
38 #include <iba/ibat.h>\r
39 #include "cma.h"\r
40 #include "..\..\..\etc\user\comp_channel.cpp"\r
41 \r
42 static struct ibv_windata windata;\r
43 \r
44 enum cma_state\r
45 {\r
46         cma_idle,\r
47         cma_listening,\r
48         cma_get_request,\r
49         cma_addr_resolve,\r
50         cma_route_resolve,\r
51         cma_passive_connect,\r
52         cma_active_connect,\r
53         cma_active_accept,\r
54         cma_accepting,\r
55         cma_connected,\r
56         cma_active_disconnect,\r
57         cma_passive_disconnect,\r
58         cma_disconnected\r
59 };\r
60 \r
61 #define CMA_DEFAULT_BACKLOG             16\r
62 \r
63 struct cma_id_private\r
64 {\r
65         struct rdma_cm_id                       id;\r
66         enum cma_state                          state;\r
67         struct cma_device                       *cma_dev;\r
68         int                                                     backlog;\r
69         int                                                     index;\r
70         struct rdma_cm_id                       **req_list;\r
71 };\r
72 \r
73 struct cma_device\r
74 {\r
75         struct ibv_context      *verbs;\r
76         uint64_t                        guid;\r
77         int                                     port_cnt;\r
78         uint8_t                         max_initiator_depth;\r
79         uint8_t                         max_responder_resources;\r
80 };\r
81 \r
82 struct cma_event {\r
83         struct rdma_cm_event    event;\r
84         uint8_t                                 private_data[56];\r
85         struct cma_id_private   *id_priv;\r
86 };\r
87 \r
88 static struct cma_device *cma_dev_array;\r
89 static int cma_dev_cnt;\r
90 \r
91 static void ucma_cleanup(void)\r
92 {\r
93         if (cma_dev_cnt > 0) {\r
94                 while (cma_dev_cnt > 0) {\r
95                         ibv_close_device(cma_dev_array[--cma_dev_cnt].verbs);\r
96                 }\r
97                 delete cma_dev_array;\r
98                 cma_dev_cnt = 0;\r
99         }\r
100         if (windata.prov != NULL) {\r
101                 ibv_release_windata(&windata, IBV_WINDATA_VERSION);\r
102                 windata.prov = NULL;\r
103         }\r
104 }\r
105 \r
106 static int ucma_init(void)\r
107 {\r
108         struct ibv_device **dev_list = NULL;\r
109         struct cma_device *cma_dev;\r
110         struct ibv_device_attr attr;\r
111         int i, ret;\r
112 \r
113         EnterCriticalSection(&lock);\r
114         if (cma_dev_cnt > 0) {\r
115                 goto out;\r
116         }\r
117 \r
118         ret = ibv_get_windata(&windata, IBV_WINDATA_VERSION);\r
119         if (ret) {\r
120                 goto err;\r
121         }\r
122 \r
123         dev_list = ibv_get_device_list(&cma_dev_cnt);\r
124         if (dev_list == NULL) {\r
125                 ret = -1;\r
126                 goto err;\r
127         }\r
128 \r
129         cma_dev_array = new struct cma_device[cma_dev_cnt];\r
130         if (cma_dev_array == NULL) {\r
131                 ret = -1;\r
132                 goto err;\r
133         }\r
134 \r
135         for (i = 0; dev_list[i]; ++i) {\r
136                 cma_dev = &cma_dev_array[i];\r
137 \r
138                 cma_dev->guid = ibv_get_device_guid(dev_list[i]);\r
139                 cma_dev->verbs = ibv_open_device(dev_list[i]);\r
140                 if (cma_dev->verbs == NULL) {\r
141                         ret = -1;\r
142                         goto err;\r
143                 }\r
144 \r
145                 ret = ibv_query_device(cma_dev->verbs, &attr);\r
146                 if (ret) {\r
147                         goto err;\r
148                 }\r
149 \r
150                 cma_dev->port_cnt = attr.phys_port_cnt;\r
151                 cma_dev->max_initiator_depth = (uint8_t) attr.max_qp_init_rd_atom;\r
152                 cma_dev->max_responder_resources = (uint8_t) attr.max_qp_rd_atom;\r
153         }\r
154         ibv_free_device_list(dev_list);\r
155 out:\r
156         LeaveCriticalSection(&lock);\r
157         return 0;\r
158 \r
159 err:\r
160         ucma_cleanup();\r
161         LeaveCriticalSection(&lock);\r
162         if (dev_list) {\r
163                 ibv_free_device_list(dev_list);\r
164         }\r
165         return ret;\r
166 }\r
167 \r
168 __declspec(dllexport)\r
169 struct ibv_context **rdma_get_devices(int *num_devices)\r
170 {\r
171         struct ibv_context **devs = NULL;\r
172         int i;\r
173 \r
174         if (!cma_dev_cnt && ucma_init()) {\r
175                 goto out;\r
176         }\r
177 \r
178         devs = new struct ibv_context *[cma_dev_cnt + 1];\r
179         if (devs == NULL) {\r
180                 goto out;\r
181         }\r
182 \r
183         for (i = 0; i < cma_dev_cnt; i++) {\r
184                 devs[i] = cma_dev_array[i].verbs;\r
185         }\r
186         devs[i] = NULL;\r
187 out:\r
188         if (num_devices != NULL) {\r
189                 *num_devices = devs ? cma_dev_cnt : 0;\r
190         }\r
191         return devs;\r
192 }\r
193 \r
194 __declspec(dllexport)\r
195 void rdma_free_devices(struct ibv_context **list)\r
196 {\r
197         delete list;\r
198 }\r
199 \r
200 __declspec(dllexport)\r
201 struct rdma_event_channel *rdma_create_event_channel(void)\r
202 {\r
203         struct rdma_event_channel *channel;\r
204 \r
205         if (!cma_dev_cnt && ucma_init()) {\r
206                 return NULL;\r
207         }\r
208 \r
209         channel = new struct rdma_event_channel;\r
210         if (channel == NULL) {\r
211                 return NULL;\r
212         }\r
213 \r
214         CompChannelInit(windata.comp_mgr, &channel->channel, INFINITE);\r
215         return channel;\r
216 }\r
217 \r
218 __declspec(dllexport)\r
219 void rdma_destroy_event_channel(struct rdma_event_channel *channel)\r
220 {\r
221         CompChannelCleanup(&channel->channel);\r
222         delete channel;\r
223 }\r
224 \r
225 __declspec(dllexport)\r
226 int rdma_create_id(struct rdma_event_channel *channel,\r
227                                    struct rdma_cm_id **id, void *context,\r
228                                    enum rdma_port_space ps)\r
229 {\r
230         struct cma_id_private *id_priv;\r
231         HRESULT hr;\r
232 \r
233         hr = cma_dev_cnt ? 0 : ucma_init();\r
234         if (hr) {\r
235                 return hr;\r
236         }\r
237 \r
238         id_priv = new struct cma_id_private;\r
239         if (id_priv == NULL) {\r
240                 return NULL;\r
241         }\r
242 \r
243         RtlZeroMemory(id_priv, sizeof(struct cma_id_private));\r
244         id_priv->id.context = context;\r
245         id_priv->id.channel = channel;\r
246         id_priv->id.ps = ps;\r
247         CompEntryInit(&channel->channel, &id_priv->id.comp_entry);\r
248 \r
249         if (ps == RDMA_PS_TCP) {\r
250                 hr = windata.prov->CreateConnectEndpoint(&id_priv->id.ep.connect);\r
251         } else {\r
252                 hr = windata.prov->CreateDatagramEndpoint(&id_priv->id.ep.datagram);\r
253         }\r
254         if (FAILED(hr)) {\r
255                 goto err;\r
256         }\r
257 \r
258         *id = &id_priv->id;\r
259         return 0;\r
260 \r
261 err:\r
262         delete id_priv;\r
263         return hr;\r
264 }\r
265 \r
266 static void ucma_destroy_listen(struct cma_id_private *id_priv)\r
267 {\r
268         while (--id_priv->backlog >= 0) {\r
269                 if (id_priv->req_list[id_priv->backlog] != NULL) {\r
270                         rdma_destroy_id(id_priv->req_list[id_priv->backlog]);\r
271                 }\r
272         }\r
273 \r
274         delete id_priv->req_list;\r
275 }\r
276 \r
277 __declspec(dllexport)\r
278 int rdma_destroy_id(struct rdma_cm_id *id)\r
279 {\r
280         struct cma_id_private *id_priv;\r
281 \r
282         id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);\r
283         if (id->ps == RDMA_PS_TCP) {\r
284                 id->ep.connect->CancelOverlappedRequests();\r
285         } else {\r
286                 id->ep.datagram->CancelOverlappedRequests();\r
287         }\r
288 \r
289         CompChannelRemoveEntry(&id->channel->channel, &id->comp_entry);\r
290 \r
291         if (id_priv->backlog > 0) {\r
292                 ucma_destroy_listen(id_priv);\r
293         }\r
294 \r
295         if (id_priv->id.ps == RDMA_PS_TCP) {\r
296                 id_priv->id.ep.connect->Release();\r
297         } else {\r
298                 id_priv->id.ep.datagram->Release();\r
299         }\r
300 \r
301         delete id_priv;\r
302         return 0;\r
303 }\r
304 \r
305 static int ucma_addrlen(struct sockaddr *addr)\r
306 {\r
307         if (addr->sa_family == PF_INET) {\r
308                 return sizeof(struct sockaddr_in);\r
309         } else {\r
310                 return sizeof(struct sockaddr_in6);\r
311         }\r
312 }\r
313 \r
314 static int ucma_get_device(struct cma_id_private *id_priv, uint64_t guid)\r
315 {\r
316         struct cma_device *cma_dev;\r
317         int i;\r
318 \r
319         for (i = 0; i < cma_dev_cnt; i++) {\r
320                 cma_dev = &cma_dev_array[i];\r
321                 if (cma_dev->guid == guid) {\r
322                         id_priv->cma_dev = cma_dev;\r
323                         id_priv->id.verbs = cma_dev->verbs;\r
324                         return 0;\r
325                 }\r
326         }\r
327         return -1;\r
328 }\r
329 \r
330 static int ucma_query_connect(struct rdma_cm_id *id, struct rdma_conn_param *param)\r
331 {\r
332         struct cma_id_private *id_priv;\r
333         WV_CONNECT_ATTRIBUTES attr;\r
334         HRESULT hr;\r
335 \r
336         id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);\r
337         hr = id->ep.connect->Query(&attr);\r
338         if (FAILED(hr)) {\r
339                 return hr;\r
340         }\r
341 \r
342         RtlCopyMemory(&id->route.addr.src_addr, &attr.LocalAddress,\r
343                                   sizeof attr.LocalAddress);\r
344         RtlCopyMemory(&id->route.addr.dst_addr, &attr.PeerAddress,\r
345                                   sizeof attr.PeerAddress);\r
346 \r
347         if (param != NULL) {\r
348                 RtlCopyMemory((void *) param->private_data, attr.Param.Data,\r
349                                           attr.Param.DataLength);\r
350                 param->private_data_len = (uint8_t) attr.Param.DataLength;\r
351                 param->responder_resources = (uint8_t) attr.Param.ResponderResources;\r
352                 param->initiator_depth = (uint8_t) attr.Param.InitiatorDepth;\r
353                 param->flow_control = 1;\r
354                 param->retry_count = attr.Param.RetryCount;\r
355                 param->rnr_retry_count = attr.Param.RnrRetryCount;\r
356         }\r
357 \r
358         if (id_priv->cma_dev == NULL && attr.Device.DeviceGuid != 0) {\r
359                 hr = ucma_get_device(id_priv, attr.Device.DeviceGuid);\r
360                 if (FAILED(hr)) {\r
361                         return hr;\r
362                 }\r
363 \r
364                 id->route.addr.addr.ibaddr.pkey = attr.Device.Pkey;\r
365                 id_priv->id.port_num = attr.Device.PortNumber;\r
366         }\r
367 \r
368         return 0;\r
369 }\r
370 \r
371 static int ucma_query_datagram(struct rdma_cm_id *id, struct rdma_ud_param *param)\r
372 {\r
373         struct cma_id_private *id_priv;\r
374         WV_DATAGRAM_ATTRIBUTES attr;\r
375         HRESULT hr;\r
376 \r
377         id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);\r
378         hr = id->ep.datagram->Query(&attr);\r
379         if (FAILED(hr)) {\r
380                 return hr;\r
381         }\r
382 \r
383         RtlCopyMemory(&id->route.addr.src_addr, &attr.LocalAddress,\r
384                                   sizeof attr.LocalAddress);\r
385         RtlCopyMemory(&id->route.addr.dst_addr, &attr.PeerAddress,\r
386                                   sizeof attr.PeerAddress);\r
387 \r
388         if (param != NULL) {\r
389                 RtlCopyMemory((void *) param->private_data, attr.Param.Data,\r
390                                           attr.Param.DataLength);\r
391                 param->private_data_len = (uint8_t) attr.Param.DataLength;\r
392                 // ucma_convert_av(&attr.Param.AddressVector, param->ah_attr)\r
393                 param->qp_num = attr.Param.Qpn;\r
394                 param->qkey = attr.Param.Qkey;\r
395         }\r
396 \r
397         if (id_priv->cma_dev == NULL && attr.Device.DeviceGuid != 0) {\r
398                 hr = ucma_get_device(id_priv, attr.Device.DeviceGuid);\r
399                 if (FAILED(hr))\r
400                         return hr;\r
401                 id->route.addr.addr.ibaddr.pkey = attr.Device.Pkey;\r
402                 id_priv->id.port_num = attr.Device.PortNumber;\r
403         }\r
404         return 0;\r
405 }\r
406 \r
407 __declspec(dllexport)\r
408 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)\r
409 {\r
410         HRESULT hr;\r
411 \r
412         if (id->ps == RDMA_PS_TCP) {\r
413                 hr = id->ep.connect->BindAddress(addr);\r
414                 if (SUCCEEDED(hr)) {\r
415                         hr = ucma_query_connect(id, NULL);\r
416                 }\r
417         } else {\r
418                 hr = id->ep.datagram->BindAddress(addr);\r
419                 if (SUCCEEDED(hr)) {\r
420                         hr = ucma_query_datagram(id, NULL);\r
421                 }\r
422         }\r
423 \r
424         return hr;\r
425 }\r
426 \r
427 __declspec(dllexport)\r
428 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,\r
429                                           struct sockaddr *dst_addr, int timeout_ms)\r
430 {\r
431         struct cma_id_private *id_priv;\r
432         WV_SOCKADDR addr;\r
433         SOCKET s;\r
434         DWORD size;\r
435         HRESULT hr;\r
436 \r
437         if (src_addr == NULL) {\r
438                 if (id->ps == RDMA_PS_TCP) {\r
439                         s = socket(dst_addr->sa_family, SOCK_STREAM, IPPROTO_TCP);\r
440                 } else {\r
441                         s = socket(dst_addr->sa_family, SOCK_DGRAM, IPPROTO_UDP);\r
442                 }\r
443                 if (s == INVALID_SOCKET) {\r
444                         return WSAGetLastError();\r
445                 }\r
446 \r
447                 hr = WSAIoctl(s, SIO_ROUTING_INTERFACE_QUERY, dst_addr, ucma_addrlen(dst_addr),\r
448                                           &addr, sizeof addr, &size, NULL, NULL);\r
449                 closesocket(s);\r
450                 if (FAILED(hr)) {\r
451                         return WSAGetLastError();\r
452                 }\r
453                 src_addr = &addr.Sa;\r
454         }\r
455 \r
456         hr = rdma_bind_addr(id, src_addr);\r
457         if (FAILED(hr)) {\r
458                 return hr;\r
459         }\r
460 \r
461         RtlCopyMemory(&id->route.addr.dst_addr, dst_addr, ucma_addrlen(dst_addr));\r
462         id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);\r
463         id_priv->state = cma_addr_resolve;\r
464 \r
465         CompEntryPost(&id->comp_entry);\r
466         return 0;\r
467 }\r
468 \r
469 __declspec(dllexport)\r
470 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)\r
471 {\r
472         struct cma_id_private *id_priv;\r
473         IBAT_PATH_BLOB path;\r
474         HRESULT hr;\r
475 \r
476         hr = IBAT::Resolve(&id->route.addr.src_addr, &id->route.addr.dst_addr, &path);\r
477         if (FAILED(hr)) {\r
478                 return hr;\r
479         }\r
480 \r
481         hr = (id->ps == RDMA_PS_TCP) ?\r
482                  id->ep.connect->Modify(WV_EP_OPTION_ROUTE, &path, sizeof path) :\r
483                  id->ep.datagram->Modify(WV_EP_OPTION_ROUTE, &path, sizeof path);\r
484         if (FAILED(hr)) {\r
485                 return hr;\r
486         }\r
487 \r
488         id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);\r
489         id_priv->state = cma_route_resolve;\r
490 \r
491         CompEntryPost(&id->comp_entry);\r
492         return 0;\r
493 }\r
494 \r
495 static int ucma_modify_qp_init(struct cma_id_private *id_priv, struct ibv_qp *qp)\r
496 {\r
497         struct ibv_qp_attr qp_attr;\r
498         UINT16 index;\r
499         HRESULT hr;\r
500 \r
501         RtlZeroMemory(&qp_attr, sizeof qp_attr);\r
502         qp_attr.qp_state = IBV_QPS_INIT;\r
503         qp_attr.port_num = id_priv->id.port_num;\r
504         hr = qp->context->cmd_if->FindPkey(id_priv->id.port_num,\r
505                                                                            id_priv->id.route.addr.addr.ibaddr.pkey,\r
506                                                                            &index);\r
507         if (FAILED(hr)) {\r
508                 return hr;\r
509         }\r
510 \r
511         qp_attr.pkey_index = index;\r
512         return ibv_modify_qp(qp, &qp_attr, (enum ibv_qp_attr_mask)\r
513                                                  (IBV_QP_STATE | IBV_QP_PKEY_INDEX | IBV_QP_PORT));\r
514 }\r
515 \r
516 static int ucma_init_ud_qp(struct cma_id_private *id_priv, struct ibv_qp *qp)\r
517 {\r
518         struct ibv_qp_attr qp_attr;\r
519         int qp_attr_mask, ret;\r
520 \r
521         ret = ucma_modify_qp_init(id_priv, qp);\r
522         if (ret) {\r
523                 return ret;\r
524         }\r
525 \r
526         qp_attr.qp_state = IBV_QPS_RTR;\r
527         ret = ibv_modify_qp(qp, &qp_attr, IBV_QP_STATE);\r
528         if (ret) {\r
529                 return ret;\r
530         }\r
531 \r
532         qp_attr.qp_state = IBV_QPS_RTS;\r
533         qp_attr.sq_psn = 0;\r
534         return ibv_modify_qp(qp, &qp_attr, (enum ibv_qp_attr_mask)\r
535                                                  (IBV_QP_STATE | IBV_QP_SQ_PSN));\r
536 }\r
537 \r
538 __declspec(dllexport)\r
539 int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd,\r
540                                    struct ibv_qp_init_attr *qp_init_attr)\r
541 {\r
542         struct cma_id_private *id_priv;\r
543         struct ibv_qp *qp;\r
544         int ret;\r
545 \r
546         id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);\r
547         if (id->verbs != pd->context) {\r
548                 return -1;\r
549         }\r
550 \r
551         qp = ibv_create_qp(pd, qp_init_attr);\r
552         if (!qp) {\r
553                 return -1;\r
554         }\r
555 \r
556         if (id->ps == RDMA_PS_TCP) {\r
557                 ret = ucma_modify_qp_init(id_priv, qp);\r
558         } else {\r
559                 ret = ucma_init_ud_qp(id_priv, qp);\r
560         }\r
561         if (ret) {\r
562                 goto err;\r
563         }\r
564 \r
565         id->qp = qp;\r
566         return 0;\r
567 err:\r
568         ibv_destroy_qp(qp);\r
569         return ret;\r
570 }\r
571 \r
572 __declspec(dllexport)\r
573 void rdma_destroy_qp(struct rdma_cm_id *id)\r
574 {\r
575         ibv_destroy_qp(id->qp);\r
576 }\r
577 \r
578 static int ucma_valid_param(struct cma_id_private *id_priv,\r
579                                                         struct rdma_conn_param *conn_param)\r
580 {\r
581         if (id_priv->id.ps != RDMA_PS_TCP) {\r
582                 return 0;\r
583         }\r
584 \r
585         if ((conn_param->responder_resources > id_priv->cma_dev->max_responder_resources) ||\r
586                 (conn_param->initiator_depth > id_priv->cma_dev->max_initiator_depth)) {\r
587                 return -1;\r
588         }\r
589 \r
590         return 0;\r
591 }\r
592 \r
593 __declspec(dllexport)\r
594 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)\r
595 {\r
596         struct cma_id_private *id_priv;\r
597         WV_CONNECT_PARAM attr;\r
598         HRESULT hr;\r
599         \r
600         id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);\r
601         hr = ucma_valid_param(id_priv, conn_param);\r
602         if (FAILED(hr)) {\r
603                 return hr;\r
604         }\r
605 \r
606         RtlZeroMemory(&attr, sizeof attr);\r
607         attr.ResponderResources = conn_param->responder_resources;\r
608         attr.InitiatorDepth = conn_param->initiator_depth;\r
609         attr.RetryCount = conn_param->retry_count;\r
610         attr.RnrRetryCount = conn_param->rnr_retry_count;\r
611         if ((attr.DataLength = conn_param->private_data_len)) {\r
612                 RtlCopyMemory(attr.Data, conn_param->private_data, attr.DataLength);\r
613         }\r
614 \r
615         id_priv->state = cma_active_connect;\r
616         hr = id->ep.connect->Connect(id->qp->conn_handle, &id->route.addr.dst_addr,\r
617                                                                  &attr, &id->comp_entry.Overlap);\r
618         if (FAILED(hr) && hr != WV_IO_PENDING) {\r
619                 id_priv->state = cma_route_resolve;\r
620                 return hr;\r
621         }\r
622 \r
623         return 0;\r
624 }\r
625 \r
626 static int ucma_get_request(struct cma_id_private *listen, int index)\r
627 {\r
628         struct cma_id_private *id_priv;\r
629         HRESULT hr;\r
630 \r
631         hr = rdma_create_id(listen->id.channel, &listen->req_list[index],\r
632                                                 listen, listen->id.ps);\r
633         if (FAILED(hr)) {\r
634                 return hr;\r
635         }\r
636 \r
637         id_priv = CONTAINING_RECORD(listen->req_list[index], struct cma_id_private, id);\r
638         id_priv->index = index;\r
639         id_priv->state = cma_get_request;\r
640 \r
641         if (listen->id.ps == RDMA_PS_TCP) {\r
642                 hr = listen->id.ep.connect->GetRequest(id_priv->id.ep.connect,\r
643                                                                                            &id_priv->id.comp_entry.Overlap);\r
644         } else {\r
645                 hr = listen->id.ep.datagram->GetRequest(id_priv->id.ep.datagram,\r
646                                                                                                 &id_priv->id.comp_entry.Overlap);\r
647         }\r
648         if (FAILED(hr) && hr != WV_IO_PENDING) {\r
649                 return hr;\r
650         }\r
651 \r
652         return 0;\r
653 }\r
654 \r
655 __declspec(dllexport)\r
656 int rdma_listen(struct rdma_cm_id *id, int backlog)\r
657 {\r
658         struct cma_id_private *id_priv, *req_id;\r
659         HRESULT hr;\r
660         int i;\r
661 \r
662         if (backlog <= 0) {\r
663                 backlog = CMA_DEFAULT_BACKLOG;\r
664         }\r
665 \r
666         id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);\r
667         id_priv->req_list = new struct rdma_cm_id*[backlog];\r
668         if (id_priv->req_list == NULL) {\r
669                 return -1;\r
670         }\r
671 \r
672         RtlZeroMemory(id_priv->req_list, sizeof(struct rdma_cm_id *) * backlog);\r
673         id_priv->backlog = backlog;\r
674 \r
675         id_priv->state = cma_listening;\r
676         hr = (id->ps == RDMA_PS_TCP) ?\r
677                  id->ep.connect->Listen(backlog) : id->ep.datagram->Listen(backlog);\r
678         if (FAILED(hr)) {\r
679                 return hr;\r
680         }\r
681 \r
682         for (i = 0; i < backlog; i++) {\r
683                 hr = ucma_get_request(id_priv, i);\r
684                 if (FAILED(hr)) {\r
685                         return hr;\r
686                 }\r
687         }\r
688 \r
689         return 0;\r
690 }\r
691 \r
692 __declspec(dllexport)\r
693 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)\r
694 {\r
695         struct cma_id_private *id_priv;\r
696         WV_CONNECT_PARAM attr;\r
697         HRESULT hr;\r
698 \r
699         id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);\r
700         hr = ucma_valid_param(id_priv, conn_param);\r
701         if (FAILED(hr)) {\r
702                 return hr;\r
703         }\r
704 \r
705         RtlZeroMemory(&attr, sizeof attr);\r
706         attr.ResponderResources = conn_param->responder_resources;\r
707         attr.InitiatorDepth = conn_param->initiator_depth;\r
708         attr.RetryCount = conn_param->retry_count;\r
709         attr.RnrRetryCount = conn_param->rnr_retry_count;\r
710         if ((attr.DataLength = conn_param->private_data_len)) {\r
711                 RtlCopyMemory(attr.Data, conn_param->private_data, attr.DataLength);\r
712         }\r
713 \r
714         id_priv->state = cma_accepting;\r
715         hr = id->ep.connect->Accept(id->qp->conn_handle, &attr,\r
716                                                                 &id->comp_entry.Overlap);\r
717         if (FAILED(hr) && hr != WV_IO_PENDING) {\r
718                 id_priv->state = cma_disconnected;\r
719                 return hr;\r
720         }\r
721 \r
722         return 0;\r
723 }\r
724 \r
725 __declspec(dllexport)\r
726 int rdma_reject(struct rdma_cm_id *id, const void *private_data,\r
727                                 uint8_t private_data_len)\r
728 {\r
729         struct cma_id_private *id_priv;\r
730         HRESULT hr;\r
731 \r
732         id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);\r
733         id_priv->state = cma_disconnected;\r
734         hr = id->ep.connect->Reject(private_data, private_data_len);\r
735         if (FAILED(hr)) {\r
736                 return hr;\r
737         }\r
738         return 0;\r
739 }\r
740 \r
741 __declspec(dllexport)\r
742 int rdma_notify(struct rdma_cm_id *id, enum ibv_event_type event)\r
743 {\r
744         return 0;\r
745 }\r
746 \r
747 __declspec(dllexport)\r
748 int rdma_disconnect(struct rdma_cm_id *id)\r
749 {\r
750         struct cma_id_private *id_priv;\r
751         HRESULT hr;\r
752 \r
753         id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);\r
754         if (id_priv->state == cma_connected) {\r
755                 id_priv->state = cma_active_disconnect;\r
756         } else {\r
757                 id_priv->state = cma_disconnected;\r
758         }\r
759         hr = id->ep.connect->Disconnect();\r
760         if (FAILED(hr)) {\r
761                 return hr;\r
762         }\r
763 \r
764         return 0;\r
765 }\r
766 \r
767 __declspec(dllexport)\r
768 int rdma_ack_cm_event(struct rdma_cm_event *event)\r
769 {\r
770         struct cma_event *evt;\r
771 \r
772         evt = CONTAINING_RECORD(event, struct cma_event, event);\r
773         delete evt;\r
774         return 0;\r
775 }\r
776 \r
777 static int ucma_process_conn_req(struct cma_event *event)\r
778 {\r
779         struct cma_id_private *listen;\r
780         struct cma_event_channel *chan;\r
781 \r
782         listen = (struct cma_id_private *) event->id_priv->id.context;\r
783         ucma_get_request(listen, event->id_priv->index);\r
784 \r
785         if (SUCCEEDED(event->event.status)) {\r
786                 event->event.status = ucma_query_connect(&event->id_priv->id,\r
787                                                                                                  &event->event.param.conn);\r
788         }\r
789 \r
790         if (SUCCEEDED(event->event.status)) {\r
791                 event->event.event = RDMA_CM_EVENT_CONNECT_REQUEST;\r
792                 event->id_priv->state = cma_passive_connect;\r
793         } else {\r
794                 rdma_destroy_id(&event->id_priv->id);\r
795         }\r
796 \r
797         return event->event.status;\r
798 }\r
799 \r
800 static int ucma_process_conn_resp(struct cma_event *event)\r
801 {\r
802         struct rdma_cm_id *id;\r
803         WV_CONNECT_PARAM attr;\r
804         HRESULT hr;\r
805 \r
806         if (FAILED(event->event.status)) {\r
807                 goto err;\r
808         }\r
809 \r
810         RtlZeroMemory(&attr, sizeof(attr));\r
811         event->id_priv->state = cma_accepting;\r
812 \r
813         id = &event->id_priv->id;\r
814         hr = id->ep.connect->Accept(id->qp->conn_handle, &attr,\r
815                                                                 &id->comp_entry.Overlap);\r
816         if (FAILED(hr) && hr != WV_IO_PENDING) {\r
817                 event->event.status = hr;\r
818                 goto err;\r
819         }\r
820 \r
821         return WV_IO_PENDING;\r
822 \r
823 err:\r
824         event->event.event = (event->event.status == WV_REJECTED) ?\r
825                                                  RDMA_CM_EVENT_REJECTED :\r
826                                                  RDMA_CM_EVENT_CONNECT_ERROR;\r
827         event->id_priv->state = cma_disconnected;\r
828         return 0;\r
829 }\r
830 \r
831 static void ucma_process_establish(struct cma_event *event)\r
832 {\r
833         struct cma_id_private *id_priv = event->id_priv;\r
834 \r
835         if (SUCCEEDED(event->event.status)) {\r
836                 event->event.status = ucma_query_connect(&id_priv->id,\r
837                                                                                                  &event->event.param.conn);\r
838         }\r
839 \r
840         if (SUCCEEDED(event->event.status)) {\r
841                 event->event.event = RDMA_CM_EVENT_ESTABLISHED;\r
842 \r
843                 id_priv->state = cma_connected;\r
844                 id_priv->id.ep.connect->NotifyDisconnect(&id_priv->id.comp_entry.Overlap);\r
845         } else {\r
846                 event->event.event = RDMA_CM_EVENT_CONNECT_ERROR;\r
847                 event->id_priv->state = cma_disconnected;\r
848         }\r
849 }\r
850 \r
851 static int ucma_process_event(struct cma_event *event)\r
852 {\r
853         WV_CONNECT_ATTRIBUTES attr;\r
854         HRESULT hr = 0;\r
855 \r
856         switch (event->id_priv->state) {\r
857         case cma_get_request:\r
858                 hr = ucma_process_conn_req(event);\r
859                 break;\r
860         case cma_addr_resolve:\r
861                 event->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;\r
862                 break;\r
863         case cma_route_resolve:\r
864                 event->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;\r
865                 break;\r
866         case cma_active_connect:\r
867                 hr = ucma_process_conn_resp(event);\r
868                 break;\r
869         case cma_accepting:\r
870                 ucma_process_establish(event);\r
871                 break;\r
872         case cma_connected:\r
873                 event->event.event = RDMA_CM_EVENT_DISCONNECTED;\r
874                 event->id_priv->state = cma_passive_disconnect;\r
875                 break;\r
876         case cma_active_disconnect:\r
877                 event->event.event = RDMA_CM_EVENT_DISCONNECTED;\r
878                 event->id_priv->state = cma_disconnected;\r
879                 break;\r
880         default:\r
881                 return -1;\r
882         }\r
883 \r
884         return hr;\r
885 }\r
886 \r
887 __declspec(dllexport)\r
888 int rdma_get_cm_event(struct rdma_event_channel *channel,\r
889                                           struct rdma_cm_event **event)\r
890 {\r
891         struct cma_event *evt;\r
892         struct rdma_cm_id *id;\r
893         COMP_ENTRY *entry;\r
894         DWORD bytes, ret;\r
895 \r
896         evt = new struct cma_event;\r
897         if (evt == NULL) {\r
898                 return -1;\r
899         }\r
900 \r
901         do {\r
902                 RtlZeroMemory(evt, sizeof(struct cma_event));\r
903 \r
904                 ret = CompChannelPoll(&channel->channel, &entry);\r
905                 if (ret) {\r
906                         return ret;\r
907                 }\r
908 \r
909                 id = CONTAINING_RECORD(entry, struct rdma_cm_id, comp_entry);\r
910                 evt->id_priv = CONTAINING_RECORD(id, struct cma_id_private, id);\r
911                 evt->event.id = id;\r
912                 evt->event.param.conn.private_data = evt->private_data;\r
913                 evt->event.status = id->ep.connect->\r
914                                                         GetOverlappedResult(&entry->Overlap, &bytes, FALSE);\r
915 \r
916                 ret = ucma_process_event(evt);\r
917         } while (ret);\r
918         \r
919         *event = &evt->event;\r
920         return 0;\r
921 }\r
922 \r
923 \r
924 __declspec(dllexport)\r
925 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,\r
926                                                 void *context)\r
927 {\r
928         return WV_NOT_SUPPORTED;\r
929 }\r
930 \r
931 __declspec(dllexport)\r
932 int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)\r
933 {\r
934         return WV_NOT_SUPPORTED;\r
935 }\r
936 \r
937 __declspec(dllexport)\r
938 const char *rdma_event_str(enum rdma_cm_event_type event)\r
939 {\r
940         switch (event) {\r
941         case RDMA_CM_EVENT_ADDR_RESOLVED:\r
942                 return "RDMA_CM_EVENT_ADDR_RESOLVED";\r
943         case RDMA_CM_EVENT_ADDR_ERROR:\r
944                 return "RDMA_CM_EVENT_ADDR_ERROR";\r
945         case RDMA_CM_EVENT_ROUTE_RESOLVED:\r
946                 return "RDMA_CM_EVENT_ROUTE_RESOLVED";\r
947         case RDMA_CM_EVENT_ROUTE_ERROR:\r
948                 return "RDMA_CM_EVENT_ROUTE_ERROR";\r
949         case RDMA_CM_EVENT_CONNECT_REQUEST:\r
950                 return "RDMA_CM_EVENT_CONNECT_REQUEST";\r
951         case RDMA_CM_EVENT_CONNECT_RESPONSE:\r
952                 return "RDMA_CM_EVENT_CONNECT_RESPONSE";\r
953         case RDMA_CM_EVENT_CONNECT_ERROR:\r
954                 return "RDMA_CM_EVENT_CONNECT_ERROR";\r
955         case RDMA_CM_EVENT_UNREACHABLE:\r
956                 return "RDMA_CM_EVENT_UNREACHABLE";\r
957         case RDMA_CM_EVENT_REJECTED:\r
958                 return "RDMA_CM_EVENT_REJECTED";\r
959         case RDMA_CM_EVENT_ESTABLISHED:\r
960                 return "RDMA_CM_EVENT_ESTABLISHED";\r
961         case RDMA_CM_EVENT_DISCONNECTED:\r
962                 return "RDMA_CM_EVENT_DISCONNECTED";\r
963         case RDMA_CM_EVENT_DEVICE_REMOVAL:\r
964                 return "RDMA_CM_EVENT_DEVICE_REMOVAL";\r
965         case RDMA_CM_EVENT_MULTICAST_JOIN:\r
966                 return "RDMA_CM_EVENT_MULTICAST_JOIN";\r
967         case RDMA_CM_EVENT_MULTICAST_ERROR:\r
968                 return "RDMA_CM_EVENT_MULTICAST_ERROR";\r
969         case RDMA_CM_EVENT_ADDR_CHANGE:\r
970                 return "RDMA_CM_EVENT_ADDR_CHANGE";\r
971         case RDMA_CM_EVENT_TIMEWAIT_EXIT:\r
972                 return "RDMA_CM_EVENT_TIMEWAIT_EXIT";\r
973         default:\r
974                 return "UNKNOWN EVENT";\r
975         }\r
976 }\r
977 \r
978 __declspec(dllexport)\r
979 int rdma_set_option(struct rdma_cm_id *id, int level, int optname,\r
980                                         void *optval, size_t optlen)\r
981 {\r
982         return WV_NOT_SUPPORTED;\r
983 }\r
984 \r
985 __declspec(dllexport)\r
986 int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel)\r
987 {\r
988         id->channel = channel;\r
989         return 0;\r
990 }\r