Compiles now against vanilla 2.6.18 kernel headers.
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/string.h>
41 #include <linux/kthread.h>
42
43 #include <asm/atomic.h>
44
45 #include "ib_srpt.h"
46
47 /* Name of this kernel module. */
48 #define DRV_NAME                "ib_srpt"
49 /* Prefix for printk() kernel messages. */
50 #define PFX                     DRV_NAME ": "
51 #define DRV_VERSION             "1.0.1"
52 #define DRV_RELDATE             "July 10, 2008"
53
54 #define MELLANOX_SRPT_ID_STRING "Mellanox OFED SRP target"
55
56 MODULE_AUTHOR("Vu Pham");
57 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
58                    "v" DRV_VERSION " (" DRV_RELDATE ")");
59 MODULE_LICENSE("Dual BSD/GPL");
60
61 struct srpt_thread {
62         /* Protects thread_ioctx_list. */
63         spinlock_t thread_lock;
64         /* I/O contexts to be processed by the kernel thread. */
65         struct list_head thread_ioctx_list;
66         /* SRPT kernel thread. */
67         struct task_struct *thread;
68 };
69
70 static u64 mellanox_ioc_guid;
71 /* List of srpt_device structures. */
72 static struct list_head srpt_devices;
73 static int thread;
74 static struct srpt_thread srpt_thread;
75 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
76
77 module_param(thread, int, 0444);
78 MODULE_PARM_DESC(thread,
79                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
80                  "where possible");
81
82 static void srpt_add_one(struct ib_device *device);
83 static void srpt_remove_one(struct ib_device *device);
84 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq);
85 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
86
87 static struct ib_client srpt_client = {
88         .name = DRV_NAME,
89         .add = srpt_add_one,
90         .remove = srpt_remove_one
91 };
92
93 /*
94  * Callback function called by the InfiniBand core when an asynchronous IB
95  * event occurs. This callback may occur in interrupt context. See also
96  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
97  * Architecture Specification.
98  */
99 static void srpt_event_handler(struct ib_event_handler *handler,
100                                struct ib_event *event)
101 {
102         struct srpt_device *sdev =
103             ib_get_client_data(event->device, &srpt_client);
104         struct srpt_port *sport;
105
106         if (!sdev || sdev->device != event->device)
107                 return;
108
109         printk(KERN_WARNING PFX "ASYNC event= %d on device= %s\n",
110                 event->event, sdev->device->name);
111
112         switch (event->event) {
113         case IB_EVENT_PORT_ERR:
114                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
115                         sport = &sdev->port[event->element.port_num - 1];
116                         sport->lid = 0;
117                         sport->sm_lid = 0;
118                 }
119                 break;
120         case IB_EVENT_PORT_ACTIVE:
121         case IB_EVENT_LID_CHANGE:
122         case IB_EVENT_PKEY_CHANGE:
123         case IB_EVENT_SM_CHANGE:
124         case IB_EVENT_CLIENT_REREGISTER:
125                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
126                         sport = &sdev->port[event->element.port_num - 1];
127                         if (!sport->lid && !sport->sm_lid)
128                                 schedule_work(&sport->work);
129                 }
130                 break;
131         default:
132                 break;
133         }
134
135 }
136
137 /*
138  * Callback function called by the InfiniBand core for SRQ (shared receive
139  * queue) events.
140  */
141 static void srpt_srq_event(struct ib_event *event, void *ctx)
142 {
143         printk(KERN_WARNING PFX "SRQ event %d\n", event->event);
144 }
145
146 /*
147  * Callback function called by the InfiniBand core for QP (queue pair) events.
148  */
149 static void srpt_qp_event(struct ib_event *event, void *ctx)
150 {
151         struct srpt_rdma_ch *ch = ctx;
152
153         printk(KERN_WARNING PFX
154                "QP event %d on cm_id=%p sess_name=%s state=%d\n",
155                event->event, ch->cm_id, ch->sess_name, ch->state);
156
157         switch (event->event) {
158         case IB_EVENT_COMM_EST:
159 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
160                 ib_cm_notify(ch->cm_id, event->event);
161 #else
162                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
163                 printk(KERN_ERR PFX "how to perform ib_cm_notify() on a"
164                         " vanilla 2.6.18 kernel ???\n");
165 #endif
166                 break;
167         case IB_EVENT_QP_LAST_WQE_REACHED:
168                 if (ch->state == RDMA_CHANNEL_LIVE) {
169                         printk(KERN_WARNING PFX
170                                "Schedule CM_DISCONNECT_WORK\n");
171                         srpt_disconnect_channel(ch, 1);
172                 }
173                 break;
174         default:
175                 break;
176         }
177 }
178
179 /*
180  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
181  * the lowest four bits of value in element slot of the array of four bit
182  * elements called c_list (controller list). The index slot is one-based.
183  *
184  * @pre 1 <= slot && 0 <= value && value < 16
185  */
186 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
187 {
188         u16 id;
189         u8 tmp;
190
191         id = (slot - 1) / 2;
192         if (slot & 0x1) {
193                 tmp = c_list[id] & 0xf;
194                 c_list[id] = (value << 4) | tmp;
195         } else {
196                 tmp = c_list[id] & 0xf0;
197                 c_list[id] = (value & 0xf) | tmp;
198         }
199 }
200
201 /*
202  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
203  * ClassPortInfo in the InfiniBand Architecture Specification.
204  */
205 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
206 {
207         struct ib_class_port_info *cif;
208
209         cif = (struct ib_class_port_info *)mad->data;
210         memset(cif, 0, sizeof *cif);
211         cif->base_version = 1;
212         cif->class_version = 1;
213         cif->resp_time_value = 20;
214
215         mad->mad_hdr.status = 0;
216 }
217
218 /*
219  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
220  * InfiniBand Architecture Specification. See also section B.7,
221  * table B.6 in the T10 SRP r16a document.
222  */
223 static void srpt_get_iou(struct ib_dm_mad *mad)
224 {
225         struct ib_dm_iou_info *ioui;
226         u8 slot;
227         int i;
228
229         ioui = (struct ib_dm_iou_info *)mad->data;
230         ioui->change_id = 1;
231         ioui->max_controllers = 16;
232
233         /* set present for slot 1 and empty for the rest */
234         srpt_set_ioc(ioui->controller_list, 1, 1);
235         for (i = 1, slot = 2; i < 16; i++, slot++)
236                 srpt_set_ioc(ioui->controller_list, slot, 0);
237
238         mad->mad_hdr.status = 0;
239 }
240
241 /*
242  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
243  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
244  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
245  * document.
246  */
247 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
248                          struct ib_dm_mad *mad)
249 {
250         struct ib_dm_ioc_profile *iocp;
251
252         iocp = (struct ib_dm_ioc_profile *)mad->data;
253
254         if (!slot || slot > 16) {
255                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
256                 return;
257         }
258
259         if (slot > 2) {
260                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
261                 return;
262         }
263
264         memset(iocp, 0, sizeof *iocp);
265         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
266         iocp->guid = cpu_to_be64(mellanox_ioc_guid);
267         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
268         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
269         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
270         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
271         iocp->subsys_device_id = 0x0;
272         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
273         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
274         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
275         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
276         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
277         iocp->rdma_read_depth = 4;
278         iocp->send_size = cpu_to_be32(MAX_MESSAGE_SIZE);
279         iocp->rdma_size = cpu_to_be32(MAX_RDMA_SIZE);
280         iocp->num_svc_entries = 1;
281         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
282             SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
283
284         mad->mad_hdr.status = 0;
285 }
286
287 /*
288  * Device management: write ServiceEntries to mad for the given slot. See also
289  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
290  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
291  */
292 static void srpt_get_svc_entries(u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
293 {
294         struct ib_dm_svc_entries *svc_entries;
295
296         if (!slot || slot > 16) {
297                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
298                 return;
299         }
300
301         if (slot > 2 || lo > hi || hi > 1) {
302                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
303                 return;
304         }
305
306         svc_entries = (struct ib_dm_svc_entries *)mad->data;
307         memset(svc_entries, 0, sizeof *svc_entries);
308         svc_entries->service_entries[0].id = cpu_to_be64(mellanox_ioc_guid);
309         sprintf(svc_entries->service_entries[0].name, "%s%016llx",
310                 SRP_SERVICE_NAME_PREFIX, (unsigned long long)mellanox_ioc_guid);
311
312         mad->mad_hdr.status = 0;
313 }
314
315 /*
316  * Actual processing of a received MAD *rq_mad received through source port *sp
317  * (MAD = InfiniBand management datagram). The response to be sent back is
318  * written to *rsp_mad.
319  */
320 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
321                                  struct ib_dm_mad *rsp_mad)
322 {
323         u16 attr_id;
324         u32 slot;
325         u8 hi, lo;
326
327         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
328         switch (attr_id) {
329         case DM_ATTR_CLASS_PORT_INFO:
330                 srpt_get_class_port_info(rsp_mad);
331                 break;
332         case DM_ATTR_IOU_INFO:
333                 srpt_get_iou(rsp_mad);
334                 break;
335         case DM_ATTR_IOC_PROFILE:
336                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
337                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
338                 break;
339         case DM_ATTR_SVC_ENTRIES:
340                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
341                 hi = (u8) ((slot >> 8) & 0xff);
342                 lo = (u8) (slot & 0xff);
343                 slot = (u16) ((slot >> 16) & 0xffff);
344                 srpt_get_svc_entries(slot, hi, lo, rsp_mad);
345                 break;
346         default:
347                 rsp_mad->mad_hdr.status =
348                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
349                 break;
350         }
351 }
352
353 /*
354  * Callback function that is called by the InfiniBand core after transmission of
355  * a MAD. (MAD = management datagram; AH = address handle.)
356  */
357 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
358                                   struct ib_mad_send_wc *mad_wc)
359 {
360         ib_destroy_ah(mad_wc->send_buf->ah);
361         ib_free_send_mad(mad_wc->send_buf);
362 }
363
364 /*
365  * Callback function that is called by the InfiniBand core after reception of
366  * a MAD (management datagram).
367  */
368 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
369                                   struct ib_mad_recv_wc *mad_wc)
370 {
371         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
372         struct ib_ah *ah;
373         struct ib_mad_send_buf *rsp;
374         struct ib_dm_mad *dm_mad;
375
376         if (!mad_wc || !mad_wc->recv_buf.mad)
377                 return;
378
379         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
380                                   mad_wc->recv_buf.grh, mad_agent->port_num);
381         if (IS_ERR(ah))
382                 goto err;
383
384         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
385
386         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
387                                  mad_wc->wc->pkey_index, 0,
388                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
389                                  GFP_KERNEL);
390         if (IS_ERR(rsp))
391                 goto err_rsp;
392
393         rsp->ah = ah;
394
395         dm_mad = rsp->mad;
396         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
397         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
398         dm_mad->mad_hdr.status = 0;
399
400         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
401         case IB_MGMT_METHOD_GET:
402                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
403                 break;
404         case IB_MGMT_METHOD_SET:
405                 dm_mad->mad_hdr.status =
406                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
407                 break;
408         default:
409                 dm_mad->mad_hdr.status =
410                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
411                 break;
412         }
413
414         if (!ib_post_send_mad(rsp, NULL)) {
415                 ib_free_recv_mad(mad_wc);
416                 /* will destroy_ah & free_send_mad in send completion */
417                 return;
418         }
419
420         ib_free_send_mad(rsp);
421
422 err_rsp:
423         ib_destroy_ah(ah);
424 err:
425         ib_free_recv_mad(mad_wc);
426 }
427
428 /*
429  * Enable InfiniBand management datagram processing, update the cached sm_lid,
430  * lid and gid values, and register a callback function for processing MADs
431  * on the specified port. It is safe to call this function more than once for
432  * the same port.
433  */
434 static int srpt_refresh_port(struct srpt_port *sport)
435 {
436         struct ib_mad_reg_req reg_req;
437         struct ib_port_modify port_modify;
438         struct ib_port_attr port_attr;
439         int ret;
440
441         memset(&port_modify, 0, sizeof port_modify);
442         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
443         port_modify.clr_port_cap_mask = 0;
444
445         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
446         if (ret)
447                 goto err_mod_port;
448
449         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
450         if (ret)
451                 goto err_query_port;
452
453         sport->sm_lid = port_attr.sm_lid;
454         sport->lid = port_attr.lid;
455
456         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
457         if (ret)
458                 goto err_query_port;
459
460         if (!sport->mad_agent) {
461                 memset(&reg_req, 0, sizeof reg_req);
462                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
463                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
464                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
465                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
466
467                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
468                                                          sport->port,
469                                                          IB_QPT_GSI,
470                                                          &reg_req, 0,
471                                                          srpt_mad_send_handler,
472                                                          srpt_mad_recv_handler,
473                                                          sport);
474                 if (IS_ERR(sport->mad_agent)) {
475                         ret = PTR_ERR(sport->mad_agent);
476                         sport->mad_agent = NULL;
477                         goto err_query_port;
478                 }
479         }
480
481         return 0;
482
483 err_query_port:
484
485         port_modify.set_port_cap_mask = 0;
486         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
487         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
488
489 err_mod_port:
490
491         return ret;
492 }
493
494 /*
495  * Unregister the callback function for processing MADs and disable MAD
496  * processing for all ports of the specified device. It is safe to call this
497  * function more than once for the same device.
498  */
499 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
500 {
501         struct ib_port_modify port_modify = {
502                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
503         };
504         struct srpt_port *sport;
505         int i;
506
507         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
508                 sport = &sdev->port[i - 1];
509                 WARN_ON(sport->port != i);
510                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
511                         printk(KERN_ERR PFX "disabling MAD processing"
512                                " failed.\n");
513                 if (sport->mad_agent) {
514                         ib_unregister_mad_agent(sport->mad_agent);
515                         sport->mad_agent = NULL;
516                 }
517         }
518 }
519
520 /*
521  * Allocate and initialize an SRPT I/O context structure.
522  */
523 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
524 {
525         struct srpt_ioctx *ioctx;
526
527         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
528         if (!ioctx)
529                 goto out;
530
531         ioctx->buf = kzalloc(MAX_MESSAGE_SIZE, GFP_KERNEL);
532         if (!ioctx->buf)
533                 goto out_free_ioctx;
534
535         ioctx->dma = dma_map_single(sdev->device->dma_device, ioctx->buf,
536                                     MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
537 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
538         if (dma_mapping_error(sdev->device->dma_device, ioctx->dma))
539 #else
540         if (dma_mapping_error(ioctx->dma))
541 #endif
542                 goto out_free_buf;
543
544         return ioctx;
545
546 out_free_buf:
547         kfree(ioctx->buf);
548 out_free_ioctx:
549         kfree(ioctx);
550 out:
551         return NULL;
552 }
553
554 /*
555  * Deallocate an SRPT I/O context structure.
556  */
557 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
558 {
559         if (!ioctx)
560                 return;
561
562         dma_unmap_single(sdev->device->dma_device, ioctx->dma,
563                          MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
564         kfree(ioctx->buf);
565         kfree(ioctx);
566 }
567
568 /*
569  * Associate a ring of SRPT I/O context structures with the specified device.
570  */
571 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
572 {
573         int i;
574
575         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
576                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
577
578                 if (!sdev->ioctx_ring[i])
579                         goto err;
580
581                 sdev->ioctx_ring[i]->index = i;
582         }
583
584         return 0;
585
586 err:
587         while (--i > 0) {
588                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
589                 sdev->ioctx_ring[i] = NULL;
590         }
591         return -ENOMEM;
592 }
593
594 /* Free the ring of SRPT I/O context structures. */
595 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
596 {
597         int i;
598
599         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
600                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
601                 sdev->ioctx_ring[i] = NULL;
602         }
603 }
604
605 /*
606  * Post a receive request on the work queue of InfiniBand device 'sdev'.
607  */
608 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
609 {
610         struct ib_sge list;
611         struct ib_recv_wr wr, *bad_wr;
612
613         wr.wr_id = ioctx->index | SRPT_OP_RECV;
614
615         list.addr = ioctx->dma;
616         list.length = MAX_MESSAGE_SIZE;
617         list.lkey = sdev->mr->lkey;
618
619         wr.next = NULL;
620         wr.sg_list = &list;
621         wr.num_sge = 1;
622
623         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
624 }
625
626 /*
627  * Post a send request on the SRPT RDMA channel 'ch'.
628  */
629 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
630                           int len)
631 {
632         struct ib_sge list;
633         struct ib_send_wr wr, *bad_wr;
634         struct srpt_device *sdev = ch->sport->sdev;
635
636         dma_sync_single_for_device(sdev->device->dma_device, ioctx->dma,
637                                    MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
638
639         list.addr = ioctx->dma;
640         list.length = len;
641         list.lkey = sdev->mr->lkey;
642
643         wr.next = NULL;
644         wr.wr_id = ioctx->index;
645         wr.sg_list = &list;
646         wr.num_sge = 1;
647         wr.opcode = IB_WR_SEND;
648         wr.send_flags = IB_SEND_SIGNALED;
649
650         return ib_post_send(ch->qp, &wr, &bad_wr);
651 }
652
653 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
654                              int *ind)
655 {
656         struct srp_indirect_buf *idb;
657         struct srp_direct_buf *db;
658
659         *ind = 0;
660         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
661             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
662                 ioctx->n_rbuf = 1;
663                 ioctx->rbufs = &ioctx->single_rbuf;
664
665                 db = (void *)srp_cmd->add_data;
666                 memcpy(ioctx->rbufs, db, sizeof *db);
667                 ioctx->data_len = be32_to_cpu(db->len);
668         } else {
669                 idb = (void *)srp_cmd->add_data;
670
671                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
672
673                 if (ioctx->n_rbuf >
674                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
675                         *ind = 1;
676                         ioctx->n_rbuf = 0;
677                         goto out;
678                 }
679
680                 if (ioctx->n_rbuf == 1)
681                         ioctx->rbufs = &ioctx->single_rbuf;
682                 else
683                         ioctx->rbufs =
684                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
685                 if (!ioctx->rbufs) {
686                         ioctx->n_rbuf = 0;
687                         return -ENOMEM;
688                 }
689
690                 db = idb->desc_list;
691                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
692                 ioctx->data_len = be32_to_cpu(idb->len);
693         }
694 out:
695         return 0;
696 }
697
698 /*
699  * Modify the attributes of queue pair 'qp': allow local write, remote read,
700  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
701  */
702 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
703 {
704         struct ib_qp_attr *attr;
705         int ret;
706
707         attr = kzalloc(sizeof *attr, GFP_KERNEL);
708         if (!attr)
709                 return -ENOMEM;
710
711         attr->qp_state = IB_QPS_INIT;
712         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
713             IB_ACCESS_REMOTE_WRITE;
714         attr->port_num = ch->sport->port;
715         attr->pkey_index = 0;
716
717         ret = ib_modify_qp(qp, attr,
718                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
719                            IB_QP_PKEY_INDEX);
720
721         kfree(attr);
722         return ret;
723 }
724
725 static int srpt_ch_qp_rtr_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp,
726                               enum ib_qp_state qp_state)
727 {
728         struct ib_qp_attr *qp_attr;
729         int attr_mask;
730         int ret;
731
732         qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
733         if (!qp_attr)
734                 return -ENOMEM;
735
736         qp_attr->qp_state = qp_state;
737         ret = ib_cm_init_qp_attr(ch->cm_id, qp_attr, &attr_mask);
738         if (ret)
739                 goto out;
740
741         if (qp_state == IB_QPS_RTR)
742                 qp_attr->max_dest_rd_atomic = 4;
743         else
744                 qp_attr->max_rd_atomic = 4;
745
746         ret = ib_modify_qp(qp, qp_attr, attr_mask);
747
748 out:
749         kfree(qp_attr);
750         return ret;
751 }
752
753 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
754 {
755         int i;
756
757         if (ioctx->n_rdma_ius > 0 && ioctx->rdma_ius) {
758                 struct rdma_iu *riu = ioctx->rdma_ius;
759
760                 for (i = 0; i < ioctx->n_rdma_ius; ++i, ++riu)
761                         kfree(riu->sge);
762                 kfree(ioctx->rdma_ius);
763         }
764
765         if (ioctx->n_rbuf > 1)
766                 kfree(ioctx->rbufs);
767
768         if (srpt_post_recv(ch->sport->sdev, ioctx))
769                 printk(KERN_ERR PFX "SRQ post_recv failed - this is serious\n");
770                 /* we should queue it back to free_ioctx queue */
771         else
772                 atomic_inc(&ch->req_lim_delta);
773 }
774
775 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
776 {
777         struct srpt_ioctx *ioctx;
778         struct srpt_device *sdev = ch->sport->sdev;
779         scst_data_direction dir = SCST_DATA_NONE;
780
781         if (wc->wr_id & SRPT_OP_RECV) {
782                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
783                 printk(KERN_ERR PFX "This is serious - SRQ is in bad state\n");
784         } else {
785                 ioctx = sdev->ioctx_ring[wc->wr_id];
786
787                 if (ioctx->scmnd) {
788                         struct scst_cmd *scmnd = ioctx->scmnd;
789
790                         dir = scst_cmd_get_data_direction(scmnd);
791
792                         if (dir == SCST_DATA_NONE)
793                                 scst_tgt_cmd_done(scmnd,
794                                         scst_estimate_context());
795                         else {
796                                 dma_unmap_sg(sdev->device->dma_device,
797                                              scst_cmd_get_sg(scmnd),
798                                              scst_cmd_get_sg_cnt(scmnd),
799                                              scst_to_tgt_dma_dir(dir));
800
801                                 if (scmnd->state == SCST_CMD_STATE_DATA_WAIT)
802                                         scst_rx_data(scmnd,
803                                                      SCST_RX_STATUS_ERROR,
804                                                      SCST_CONTEXT_THREAD);
805                                 else if (scmnd->state == SCST_CMD_STATE_XMIT_WAIT)
806                                         scst_tgt_cmd_done(scmnd,
807                                                 scst_estimate_context());
808                         }
809                 } else
810                         srpt_reset_ioctx(ch, ioctx);
811         }
812 }
813
814 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
815                                   struct srpt_ioctx *ioctx,
816                                   enum scst_exec_context context)
817 {
818         if (ioctx->scmnd) {
819                 scst_data_direction dir =
820                         scst_cmd_get_data_direction(ioctx->scmnd);
821
822                 if (dir != SCST_DATA_NONE)
823                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
824                                      scst_cmd_get_sg(ioctx->scmnd),
825                                      scst_cmd_get_sg_cnt(ioctx->scmnd),
826                                      scst_to_tgt_dma_dir(dir));
827
828                 scst_tgt_cmd_done(ioctx->scmnd, context);
829         } else
830                 srpt_reset_ioctx(ch, ioctx);
831 }
832
833 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
834                                   struct srpt_ioctx *ioctx)
835 {
836         if (!ioctx->scmnd) {
837                 srpt_reset_ioctx(ch, ioctx);
838                 return;
839         }
840
841         if (scst_cmd_get_data_direction(ioctx->scmnd) == SCST_DATA_WRITE)
842                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
843                         scst_estimate_context());
844 }
845
846 static void srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
847                                struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
848                                u64 tag)
849 {
850         struct srp_rsp *srp_rsp;
851         struct sense_data *sense;
852         int limit_delta;
853
854         srp_rsp = ioctx->buf;
855         memset(srp_rsp, 0, sizeof *srp_rsp);
856
857         limit_delta = atomic_read(&ch->req_lim_delta);
858         atomic_sub(limit_delta, &ch->req_lim_delta);
859
860         srp_rsp->opcode = SRP_RSP;
861         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
862         srp_rsp->tag = tag;
863
864         if (s_key != NO_SENSE) {
865                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
866                 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
867                 srp_rsp->sense_data_len =
868                     cpu_to_be32(sizeof *sense + (sizeof *sense % 4));
869
870                 sense = (struct sense_data *)(srp_rsp + 1);
871                 sense->err_code = 0x70;
872                 sense->key = s_key;
873                 sense->asc_ascq = s_code;
874         }
875 }
876
877 static void srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
878                                    struct srpt_ioctx *ioctx, u8 rsp_code,
879                                    u64 tag)
880 {
881         struct srp_rsp *srp_rsp;
882         int limit_delta;
883
884         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
885                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
886
887         srp_rsp = ioctx->buf;
888         memset(srp_rsp, 0, sizeof *srp_rsp);
889
890         limit_delta = atomic_read(&ch->req_lim_delta);
891         atomic_sub(limit_delta, &ch->req_lim_delta);
892
893         srp_rsp->opcode = SRP_RSP;
894         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
895         srp_rsp->tag = tag;
896
897         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
898                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
899                 srp_rsp->resp_data_len = cpu_to_be32(4);
900                 srp_rsp->data[3] = rsp_code;
901         }
902 }
903
904 /*
905  * Process SRP_CMD.
906  */
907 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
908 {
909         struct scst_cmd *scmnd = NULL;
910         struct srp_cmd *srp_cmd = NULL;
911         scst_data_direction dir = SCST_DATA_NONE;
912         int indirect_desc = 0;
913         int ret;
914
915         srp_cmd = ioctx->buf;
916
917         if (srp_cmd->buf_fmt) {
918                 ret = srpt_get_desc_tbl(ioctx, srp_cmd, &indirect_desc);
919                 if (ret) {
920                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
921                                            NO_ADD_SENSE, srp_cmd->tag);
922                         ((struct srp_rsp *)ioctx->buf)->status =
923                                         SAM_STAT_TASK_SET_FULL;
924                         goto send_rsp;
925                 }
926
927                 if (indirect_desc) {
928                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
929                                            NO_ADD_SENSE, srp_cmd->tag);
930                         ((struct srp_rsp *)ioctx->buf)->status =
931                                         SAM_STAT_TASK_SET_FULL;
932                         goto send_rsp;
933                 }
934
935                 if (srp_cmd->buf_fmt & 0xf)
936                         dir = SCST_DATA_READ;
937                 else if (srp_cmd->buf_fmt >> 4)
938                         dir = SCST_DATA_WRITE;
939                 else
940                         dir = SCST_DATA_NONE;
941         } else
942                 dir = SCST_DATA_NONE;
943
944         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
945                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
946                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
947         if (!scmnd) {
948                 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
949                                    NO_ADD_SENSE, srp_cmd->tag);
950                 ((struct srp_rsp *)ioctx->buf)->status =
951                         SAM_STAT_TASK_SET_FULL;
952                 goto send_rsp;
953         }
954
955         ioctx->scmnd = scmnd;
956
957         switch (srp_cmd->task_attr) {
958         case SRP_CMD_HEAD_OF_Q:
959                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
960                 break;
961         case SRP_CMD_ORDERED_Q:
962                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
963                 break;
964         case SRP_CMD_SIMPLE_Q:
965                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
966                 break;
967         case SRP_CMD_ACA:
968                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
969                 break;
970         default:
971                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
972                 break;
973         }
974
975         scst_cmd_set_tag(scmnd, srp_cmd->tag);
976         scst_cmd_set_tgt_priv(scmnd, ioctx);
977         scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
978
979         spin_lock_irq(&ch->spinlock);
980         list_add_tail(&ioctx->scmnd_list, &ch->active_scmnd_list);
981         ch->active_scmnd_cnt++;
982         spin_unlock_irq(&ch->spinlock);
983
984         scst_cmd_init_done(scmnd, scst_estimate_context());
985
986         return 0;
987
988 send_rsp:
989         return -1;
990 }
991
992 /*
993  * Process SRP_TSK_MGMT. See also table 19 in the T10 SRP r16a document.
994  */
995 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
996                                 struct srpt_ioctx *ioctx)
997 {
998         struct srp_tsk_mgmt *srp_tsk = NULL;
999         struct srpt_mgmt_ioctx *mgmt_ioctx;
1000         int ret;
1001
1002         srp_tsk = ioctx->buf;
1003
1004         printk(KERN_WARNING PFX
1005                "recv_tsk_mgmt= %d for task_tag= %lld"
1006                " using tag= %lld cm_id= %p sess= %p\n",
1007                srp_tsk->tsk_mgmt_func,
1008                (unsigned long long) srp_tsk->task_tag,
1009                (unsigned long long) srp_tsk->tag,
1010                ch->cm_id, ch->scst_sess);
1011
1012         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1013         if (!mgmt_ioctx) {
1014                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1015                                        srp_tsk->tag);
1016                 goto send_rsp;
1017         }
1018
1019         mgmt_ioctx->ioctx = ioctx;
1020         mgmt_ioctx->ch = ch;
1021         mgmt_ioctx->tag = srp_tsk->tag;
1022
1023         switch (srp_tsk->tsk_mgmt_func) {
1024         case SRP_TSK_ABORT_TASK:
1025                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1026                                           SCST_ABORT_TASK,
1027                                           srp_tsk->task_tag,
1028                                           thread ?
1029                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1030                                           mgmt_ioctx);
1031                 break;
1032         case SRP_TSK_ABORT_TASK_SET:
1033                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1034                                           SCST_ABORT_TASK_SET,
1035                                           (u8 *) &srp_tsk->lun,
1036                                           sizeof srp_tsk->lun,
1037                                           thread ?
1038                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1039                                           mgmt_ioctx);
1040                 break;
1041         case SRP_TSK_CLEAR_TASK_SET:
1042                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1043                                           SCST_CLEAR_TASK_SET,
1044                                           (u8 *) &srp_tsk->lun,
1045                                           sizeof srp_tsk->lun,
1046                                           thread ?
1047                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1048                                           mgmt_ioctx);
1049                 break;
1050 #if 0
1051         case SRP_TSK_LUN_RESET:
1052                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1053                                           SCST_LUN_RESET,
1054                                           (u8 *) &srp_tsk->lun,
1055                                           sizeof srp_tsk->lun,
1056                                           thread ?
1057                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1058                                           mgmt_ioctx);
1059                 break;
1060 #endif
1061         case SRP_TSK_CLEAR_ACA:
1062                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1063                                           SCST_CLEAR_ACA,
1064                                           (u8 *) &srp_tsk->lun,
1065                                           sizeof srp_tsk->lun,
1066                                           thread ?
1067                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1068                                           mgmt_ioctx);
1069                 break;
1070         default:
1071                 srpt_build_tskmgmt_rsp(ch, ioctx,
1072                                        SRP_TSK_MGMT_FUNC_NOT_SUPP,
1073                                        srp_tsk->tag);
1074                 goto send_rsp;
1075         }
1076         return 0;
1077
1078 send_rsp:
1079         return -1;
1080 }
1081
1082 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1083                                struct srpt_ioctx *ioctx)
1084 {
1085         u8 op;
1086
1087         if (ch->state != RDMA_CHANNEL_LIVE) {
1088                 if (ch->state == RDMA_CHANNEL_CONNECTING) {
1089                         spin_lock_irq(&ch->spinlock);
1090                         list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1091                         spin_unlock_irq(&ch->spinlock);
1092                 } else
1093                         srpt_reset_ioctx(ch, ioctx);
1094
1095                 return;
1096         }
1097
1098         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
1099                                 MAX_MESSAGE_SIZE, DMA_FROM_DEVICE);
1100
1101         ioctx->data_len = 0;
1102         ioctx->n_rbuf = 0;
1103         ioctx->rbufs = NULL;
1104         ioctx->n_rdma = 0;
1105         ioctx->n_rdma_ius = 0;
1106         ioctx->rdma_ius = NULL;
1107         ioctx->scmnd = NULL;
1108
1109         op = *(u8 *) ioctx->buf;
1110         switch (op) {
1111         case SRP_CMD:
1112                 if (srpt_handle_cmd(ch, ioctx) < 0)
1113                         goto send_rsp;
1114                 break;
1115
1116         case SRP_TSK_MGMT:
1117                 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1118                         goto send_rsp;
1119                 break;
1120
1121         case SRP_I_LOGOUT:
1122         case SRP_AER_REQ:
1123         default:
1124                 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1125                                    ((struct srp_cmd *)ioctx->buf)->tag);
1126
1127                 goto send_rsp;
1128         }
1129
1130         dma_sync_single_for_device(ch->sport->sdev->device->dma_device,
1131                                    ioctx->dma, MAX_MESSAGE_SIZE,
1132                                    DMA_FROM_DEVICE);
1133
1134         return;
1135
1136 send_rsp:
1137         if (ch->state != RDMA_CHANNEL_LIVE ||
1138             srpt_post_send(ch, ioctx,
1139                            sizeof(struct srp_rsp) +
1140                            be32_to_cpu(((struct srp_rsp *)ioctx->buf)->
1141                                        sense_data_len)))
1142                 srpt_reset_ioctx(ch, ioctx);
1143 }
1144
1145 /*
1146  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1147  * should stop.
1148  * @pre thread != 0
1149  */
1150 static inline int srpt_test_ioctx_list(void)
1151 {
1152         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1153                    unlikely(kthread_should_stop()));
1154         return res;
1155 }
1156
1157 /*
1158  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1159  *
1160  * @pre thread != 0
1161  */
1162 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1163 {
1164         unsigned long flags;
1165
1166         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1167         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1168         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1169         wake_up(&ioctx_list_waitQ);
1170 }
1171
1172 /*
1173  * InfiniBand CQ (completion queue) event handler for asynchronous events not
1174  * associated with a completion.
1175  */
1176 static void srpt_completion(struct ib_cq *cq, void *ctx)
1177 {
1178         struct srpt_rdma_ch *ch = ctx;
1179         struct srpt_device *sdev = ch->sport->sdev;
1180         struct ib_wc wc;
1181         struct srpt_ioctx *ioctx;
1182
1183         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1184         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1185                 if (wc.status) {
1186                         printk(KERN_ERR PFX "failed %s status= %d\n",
1187                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1188                                wc.status);
1189                         srpt_handle_err_comp(ch, &wc);
1190                         break;
1191                 }
1192
1193                 if (wc.wr_id & SRPT_OP_RECV) {
1194                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1195                         if (thread) {
1196                                 ioctx->ch = ch;
1197                                 ioctx->op = IB_WC_RECV;
1198                                 srpt_schedule_thread(ioctx);
1199                         } else
1200                                 srpt_handle_new_iu(ch, ioctx);
1201                         continue;
1202                 } else
1203                         ioctx = sdev->ioctx_ring[wc.wr_id];
1204
1205                 if (thread) {
1206                         ioctx->ch = ch;
1207                         ioctx->op = wc.opcode;
1208                         srpt_schedule_thread(ioctx);
1209                 } else {
1210                         switch (wc.opcode) {
1211                         case IB_WC_SEND:
1212                                 srpt_handle_send_comp(ch, ioctx,
1213                                         scst_estimate_context());
1214                                 break;
1215                         case IB_WC_RDMA_WRITE:
1216                         case IB_WC_RDMA_READ:
1217                                 srpt_handle_rdma_comp(ch, ioctx);
1218                                 break;
1219                         default:
1220                                 break;
1221                         }
1222                 }
1223         }
1224 }
1225
1226 /*
1227  * Create a completion queue on the specified device.
1228  */
1229 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1230 {
1231         struct ib_qp_init_attr *qp_init;
1232         struct srpt_device *sdev = ch->sport->sdev;
1233         int cqe;
1234         int ret;
1235
1236         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1237         if (!qp_init)
1238                 return -ENOMEM;
1239
1240         /* Create a completion queue (CQ). */
1241
1242         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1243 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1244         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1245 #else
1246         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1247 #endif
1248         if (IS_ERR(ch->cq)) {
1249                 ret = PTR_ERR(ch->cq);
1250                 printk(KERN_ERR PFX "failed to create_cq cqe= %d ret= %d\n",
1251                         cqe, ret);
1252                 goto out;
1253         }
1254
1255         /* Request completion notification. */
1256
1257         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1258
1259         /* Create a queue pair (QP). */
1260
1261         qp_init->qp_context = (void *)ch;
1262         qp_init->event_handler = srpt_qp_event;
1263         qp_init->send_cq = ch->cq;
1264         qp_init->recv_cq = ch->cq;
1265         qp_init->srq = sdev->srq;
1266         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1267         qp_init->qp_type = IB_QPT_RC;
1268         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1269         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1270
1271         ch->qp = ib_create_qp(sdev->pd, qp_init);
1272         if (IS_ERR(ch->qp)) {
1273                 ret = PTR_ERR(ch->qp);
1274                 ib_destroy_cq(ch->cq);
1275                 printk(KERN_ERR PFX "failed to create_qp ret= %d\n", ret);
1276                 goto out;
1277         }
1278
1279         printk(KERN_DEBUG PFX "%s: max_cqe= %d max_sge= %d cm_id= %p\n",
1280                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1281                ch->cm_id);
1282
1283         /* Modify the attributes and the state of queue pair ch->qp. */
1284
1285         ret = srpt_init_ch_qp(ch, ch->qp);
1286         if (ret) {
1287                 ib_destroy_qp(ch->qp);
1288                 ib_destroy_cq(ch->cq);
1289                 goto out;
1290         }
1291
1292         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1293 out:
1294         kfree(qp_init);
1295         return ret;
1296 }
1297
1298 static struct srpt_rdma_ch *srpt_find_channel(struct ib_cm_id *cm_id)
1299 {
1300         struct srpt_device *sdev = cm_id->context;
1301         struct srpt_rdma_ch *ch, *tmp_ch;
1302
1303         spin_lock_irq(&sdev->spinlock);
1304         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1305                 if (ch->cm_id == cm_id) {
1306                         spin_unlock_irq(&sdev->spinlock);
1307                         return ch;
1308                 }
1309         }
1310
1311         spin_unlock_irq(&sdev->spinlock);
1312
1313         return NULL;
1314 }
1315
1316 static int srpt_release_channel(struct srpt_rdma_ch *ch, int destroy_cmid)
1317 {
1318         spin_lock_irq(&ch->sport->sdev->spinlock);
1319         list_del(&ch->list);
1320         spin_unlock_irq(&ch->sport->sdev->spinlock);
1321
1322         if (ch->cm_id && destroy_cmid) {
1323                 printk(KERN_WARNING PFX
1324                        "%s: destroy cm_id= %p\n", __func__, ch->cm_id);
1325                 ib_destroy_cm_id(ch->cm_id);
1326                 ch->cm_id = NULL;
1327         }
1328
1329         ib_destroy_qp(ch->qp);
1330         ib_destroy_cq(ch->cq);
1331
1332         if (ch->scst_sess) {
1333                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1334
1335                 printk(KERN_WARNING PFX
1336                        "%s: release sess= %p sess_name= %s active_cmd= %d\n",
1337                        __func__, ch->scst_sess, ch->sess_name,
1338                        ch->active_scmnd_cnt);
1339
1340                 list_for_each_entry_safe(ioctx, ioctx_tmp,
1341                                          &ch->active_scmnd_list, scmnd_list) {
1342                         list_del(&ioctx->scmnd_list);
1343                         ch->active_scmnd_cnt--;
1344                 }
1345
1346                 scst_unregister_session(ch->scst_sess, 0, NULL);
1347                 ch->scst_sess = NULL;
1348         }
1349
1350         kfree(ch);
1351
1352         return destroy_cmid ? 0 : 1;
1353 }
1354
1355 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq)
1356 {
1357         spin_lock_irq(&ch->spinlock);
1358         ch->state = RDMA_CHANNEL_DISCONNECTING;
1359         spin_unlock_irq(&ch->spinlock);
1360
1361         if (dreq)
1362                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
1363         else
1364                 ib_send_cm_drep(ch->cm_id, NULL, 0);
1365
1366         return 0;
1367 }
1368
1369 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1370                             struct ib_cm_req_event_param *param,
1371                             void *private_data)
1372 {
1373         struct srpt_device *sdev = cm_id->context;
1374         struct srp_login_req *req;
1375         struct srp_login_rsp *rsp;
1376         struct srp_login_rej *rej;
1377         struct ib_cm_rep_param *rep_param;
1378         struct srpt_rdma_ch *ch, *tmp_ch;
1379         u32 it_iu_len;
1380         int ret = 0;
1381
1382         if (!sdev || !private_data)
1383                 return -EINVAL;
1384
1385         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1386         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1387         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1388
1389         if (!rsp || !rej || !rep_param) {
1390                 ret = -ENOMEM;
1391                 goto out;
1392         }
1393
1394         req = (struct srp_login_req *)private_data;
1395
1396         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1397
1398         printk(KERN_DEBUG PFX
1399                "Host login i_port_id=0x%llx:0x%llx t_port_id=0x%llx:0x%llx"
1400                " it_iu_len=%d\n",
1401                (unsigned long long)
1402                be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1403                (unsigned long long)
1404                be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1405                (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1406                (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1407                it_iu_len);
1408
1409         if (it_iu_len > MAX_MESSAGE_SIZE || it_iu_len < 64) {
1410                 rej->reason =
1411                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1412                 ret = -EINVAL;
1413                 printk(KERN_WARNING PFX
1414                        "Reject invalid it_iu_len=%d\n", it_iu_len);
1415                 goto reject;
1416         }
1417
1418         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1419                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1420
1421                 spin_lock_irq(&sdev->spinlock);
1422
1423                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1424                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1425                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1426                             && param->port == ch->sport->port
1427                             && param->listen_id == ch->sport->sdev->cm_id
1428                             && ch->cm_id) {
1429                                 /* found an existing channel */
1430                                 printk(KERN_WARNING PFX
1431                                        "Found existing channel name= %s"
1432                                        " cm_id= %p state= %d\n",
1433                                        ch->sess_name, ch->cm_id, ch->state);
1434
1435                                 spin_unlock_irq(&sdev->spinlock);
1436
1437                                 rsp->rsp_flags =
1438                                     SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1439
1440                                 if (ch->state == RDMA_CHANNEL_LIVE)
1441                                         srpt_disconnect_channel(ch, 1);
1442                                 else if (ch->state == RDMA_CHANNEL_CONNECTING) {
1443                                         ib_send_cm_rej(ch->cm_id,
1444                                                        IB_CM_REJ_NO_RESOURCES,
1445                                                        NULL, 0, NULL, 0);
1446                                         srpt_release_channel(ch, 1);
1447                                 }
1448
1449                                 spin_lock_irq(&sdev->spinlock);
1450                         }
1451                 }
1452
1453                 spin_unlock_irq(&sdev->spinlock);
1454
1455         } else
1456                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1457
1458         if (((u64) (*(u64 *) req->target_port_id) !=
1459              cpu_to_be64(mellanox_ioc_guid)) ||
1460             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1461              cpu_to_be64(mellanox_ioc_guid))) {
1462                 rej->reason =
1463                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1464                 ret = -ENOMEM;
1465                 printk(KERN_WARNING PFX "Reject invalid target_port_id\n");
1466                 goto reject;
1467         }
1468
1469         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1470         if (!ch) {
1471                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1472                 printk(KERN_WARNING PFX "Reject failed allocate rdma_ch\n");
1473                 ret = -ENOMEM;
1474                 goto reject;
1475         }
1476
1477         spin_lock_init(&ch->spinlock);
1478         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1479         memcpy(ch->t_port_id, req->target_port_id, 16);
1480         ch->sport = &sdev->port[param->port - 1];
1481         ch->cm_id = cm_id;
1482         ch->state = RDMA_CHANNEL_CONNECTING;
1483         INIT_LIST_HEAD(&ch->cmd_wait_list);
1484         INIT_LIST_HEAD(&ch->active_scmnd_list);
1485
1486         ret = srpt_create_ch_ib(ch);
1487         if (ret) {
1488                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1489                 printk(KERN_WARNING PFX "Reject failed to create rdma_ch\n");
1490                 goto free_ch;
1491         }
1492
1493         ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTR);
1494         if (ret) {
1495                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1496                 printk(KERN_WARNING PFX
1497                        "Reject failed qp to rtr/rts ret=%d\n", ret);
1498                 goto destroy_ib;
1499         }
1500
1501         sprintf(ch->sess_name, "0x%016llx%016llx",
1502                 (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1503                 (unsigned long long)be64_to_cpu(*(u64 *)(ch->i_port_id + 8)));
1504
1505         BUG_ON(!sdev->scst_tgt);
1506         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1507                                   NULL, NULL);
1508         if (!ch->scst_sess) {
1509                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1510                 printk(KERN_WARNING PFX "Failed to create scst sess\n");
1511                 goto destroy_ib;
1512         }
1513
1514         spin_lock_irq(&sdev->spinlock);
1515         list_add_tail(&ch->list, &sdev->rch_list);
1516         spin_unlock_irq(&sdev->spinlock);
1517
1518         printk(KERN_DEBUG PFX "Establish connection sess=%p name=%s cm_id=%p\n",
1519                ch->scst_sess, ch->sess_name, ch->cm_id);
1520
1521         scst_sess_set_tgt_priv(ch->scst_sess, ch);
1522
1523         /* create srp_login_response */
1524         rsp->opcode = SRP_LOGIN_RSP;
1525         rsp->tag = req->tag;
1526         rsp->max_it_iu_len = req->req_it_iu_len;
1527         rsp->max_ti_iu_len = req->req_it_iu_len;
1528         rsp->buf_fmt =
1529             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1530         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1531         atomic_set(&ch->req_lim_delta, 0);
1532
1533         /* create cm reply */
1534         rep_param->qp_num = ch->qp->qp_num;
1535         rep_param->private_data = (void *)rsp;
1536         rep_param->private_data_len = sizeof *rsp;
1537         rep_param->rnr_retry_count = 7;
1538         rep_param->flow_control = 1;
1539         rep_param->failover_accepted = 0;
1540         rep_param->srq = 1;
1541         rep_param->responder_resources = 4;
1542         rep_param->initiator_depth = 4;
1543
1544         ret = ib_send_cm_rep(cm_id, rep_param);
1545         if (ret)
1546                 srpt_release_channel(ch, 0);
1547
1548         goto out;
1549
1550 destroy_ib:
1551         ib_destroy_qp(ch->qp);
1552         ib_destroy_cq(ch->cq);
1553
1554 free_ch:
1555         kfree(ch);
1556
1557 reject:
1558         rej->opcode = SRP_LOGIN_REJ;
1559         rej->tag = req->tag;
1560         rej->buf_fmt =
1561             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1562
1563         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1564                              (void *)rej, sizeof *rej);
1565
1566 out:
1567         kfree(rep_param);
1568         kfree(rsp);
1569         kfree(rej);
1570
1571         return ret;
1572 }
1573
1574 static int srpt_find_and_release_channel(struct ib_cm_id *cm_id)
1575 {
1576         struct srpt_rdma_ch *ch;
1577
1578         ch = srpt_find_channel(cm_id);
1579         if (!ch)
1580                 return -EINVAL;
1581
1582         return srpt_release_channel(ch, 0);
1583 }
1584
1585 static int srpt_cm_rej_recv(struct ib_cm_id *cm_id)
1586 {
1587         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1588         return srpt_find_and_release_channel(cm_id);
1589 }
1590
1591 static int srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
1592 {
1593         struct srpt_rdma_ch *ch;
1594         int ret;
1595
1596         ch = srpt_find_channel(cm_id);
1597         if (!ch)
1598                 return -EINVAL;
1599
1600         if (ch->state == RDMA_CHANNEL_CONNECTING) {
1601                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1602
1603                 spin_lock_irq(&ch->spinlock);
1604                 ch->state = RDMA_CHANNEL_LIVE;
1605                 spin_unlock_irq(&ch->spinlock);
1606                 ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTS);
1607
1608                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
1609                                          wait_list) {
1610                         list_del(&ioctx->wait_list);
1611                         srpt_handle_new_iu(ch, ioctx);
1612                 }
1613         } else if (ch->state == RDMA_CHANNEL_DISCONNECTING)
1614                 ret = -EAGAIN;
1615         else
1616                 ret = 0;
1617
1618         if (ret) {
1619                 printk(KERN_ERR PFX "cm_id=%p sess_name=%s state=%d\n",
1620                        cm_id, ch->sess_name, ch->state);
1621                 srpt_disconnect_channel(ch, 1);
1622         }
1623
1624         return ret;
1625 }
1626
1627 static int srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
1628 {
1629         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1630         return srpt_find_and_release_channel(cm_id);
1631 }
1632
1633 static int srpt_cm_rep_error(struct ib_cm_id *cm_id)
1634 {
1635         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1636         return srpt_find_and_release_channel(cm_id);
1637 }
1638
1639 static int srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
1640 {
1641         struct srpt_rdma_ch *ch;
1642         int ret = 0;
1643
1644         ch = srpt_find_channel(cm_id);
1645
1646         if (!ch)
1647                 return -EINVAL;
1648
1649         printk(KERN_DEBUG PFX "%s: cm_id= %p ch->state= %d\n",
1650                  __func__, cm_id, ch->state);
1651
1652         switch (ch->state) {
1653         case RDMA_CHANNEL_LIVE:
1654         case RDMA_CHANNEL_CONNECTING:
1655                 ret = srpt_disconnect_channel(ch, 0);
1656                 break;
1657         case RDMA_CHANNEL_DISCONNECTING:
1658         default:
1659                 break;
1660         }
1661
1662         return ret;
1663 }
1664
1665 static int srpt_cm_drep_recv(struct ib_cm_id *cm_id)
1666 {
1667         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1668         return srpt_find_and_release_channel(cm_id);
1669 }
1670
1671 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1672 {
1673         int ret = 0;
1674
1675         switch (event->event) {
1676         case IB_CM_REQ_RECEIVED:
1677                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
1678                                        event->private_data);
1679                 break;
1680         case IB_CM_REJ_RECEIVED:
1681                 ret = srpt_cm_rej_recv(cm_id);
1682                 break;
1683         case IB_CM_RTU_RECEIVED:
1684         case IB_CM_USER_ESTABLISHED:
1685                 ret = srpt_cm_rtu_recv(cm_id);
1686                 break;
1687         case IB_CM_DREQ_RECEIVED:
1688                 ret = srpt_cm_dreq_recv(cm_id);
1689                 break;
1690         case IB_CM_DREP_RECEIVED:
1691                 ret = srpt_cm_drep_recv(cm_id);
1692                 break;
1693         case IB_CM_TIMEWAIT_EXIT:
1694                 ret = srpt_cm_timewait_exit(cm_id);
1695                 break;
1696         case IB_CM_REP_ERROR:
1697                 ret = srpt_cm_rep_error(cm_id);
1698                 break;
1699         default:
1700                 break;
1701         }
1702
1703         return ret;
1704 }
1705
1706 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1707                                  struct srpt_ioctx *ioctx,
1708                                  struct scst_cmd *scmnd)
1709 {
1710         struct scatterlist *scat;
1711         scst_data_direction dir;
1712         struct rdma_iu *riu;
1713         struct srp_direct_buf *db;
1714         dma_addr_t dma_addr;
1715         struct ib_sge *sge;
1716         u64 raddr;
1717         u32 rsize;
1718         u32 tsize;
1719         u32 dma_len;
1720         int count, nrdma;
1721         int i, j, k;
1722
1723         scat = scst_cmd_get_sg(scmnd);
1724         dir = scst_cmd_get_data_direction(scmnd);
1725         count = dma_map_sg(ch->sport->sdev->device->dma_device, scat,
1726                            scst_cmd_get_sg_cnt(scmnd),
1727                            scst_to_tgt_dma_dir(dir));
1728         if (unlikely(!count))
1729                 return -EBUSY;
1730
1731         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
1732                 nrdma = ioctx->n_rdma_ius;
1733         else {
1734                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
1735
1736                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
1737                                           scst_cmd_atomic(scmnd)
1738                                           ? GFP_ATOMIC : GFP_KERNEL);
1739                 if (!ioctx->rdma_ius) {
1740                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1741                                      scat, scst_cmd_get_sg_cnt(scmnd),
1742                                      scst_to_tgt_dma_dir(dir));
1743                         return -ENOMEM;
1744                 }
1745
1746                 ioctx->n_rdma_ius = nrdma;
1747         }
1748
1749         db = ioctx->rbufs;
1750         tsize = (dir == SCST_DATA_READ) ?
1751                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1752         dma_len = sg_dma_len(&scat[0]);
1753         riu = ioctx->rdma_ius;
1754
1755         /*
1756          * For each remote desc - calculate the #ib_sge.
1757          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
1758          *      each remote desc rdma_iu is required a rdma wr;
1759          * else
1760          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
1761          *      another rdma wr
1762          */
1763         for (i = 0, j = 0;
1764              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1765                 rsize = be32_to_cpu(db->len);
1766                 raddr = be64_to_cpu(db->va);
1767                 riu->raddr = raddr;
1768                 riu->rkey = be32_to_cpu(db->key);
1769                 riu->sge_cnt = 0;
1770
1771                 /* calculate how many sge required for this remote_buf */
1772                 while (rsize > 0 && tsize > 0) {
1773
1774                         if (rsize >= dma_len) {
1775                                 tsize -= dma_len;
1776                                 rsize -= dma_len;
1777                                 raddr += dma_len;
1778
1779                                 if (tsize > 0) {
1780                                         ++j;
1781                                         if (j < count)
1782                                                 dma_len = sg_dma_len(&scat[j]);
1783                                 }
1784                         } else {
1785                                 tsize -= rsize;
1786                                 dma_len -= rsize;
1787                                 rsize = 0;
1788                         }
1789
1790                         ++riu->sge_cnt;
1791
1792                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
1793                                 riu->sge =
1794                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
1795                                             scst_cmd_atomic(scmnd)
1796                                             ? GFP_ATOMIC : GFP_KERNEL);
1797                                 if (!riu->sge)
1798                                         goto free_mem;
1799
1800                                 ++ioctx->n_rdma;
1801                                 ++riu;
1802                                 riu->sge_cnt = 0;
1803                                 riu->raddr = raddr;
1804                                 riu->rkey = be32_to_cpu(db->key);
1805                         }
1806                 }
1807
1808                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
1809                                    scst_cmd_atomic(scmnd)
1810                                    ? GFP_ATOMIC : GFP_KERNEL);
1811
1812                 if (!riu->sge)
1813                         goto free_mem;
1814
1815                 ++ioctx->n_rdma;
1816         }
1817
1818         db = ioctx->rbufs;
1819         scat = scst_cmd_get_sg(scmnd);
1820         tsize = (dir == SCST_DATA_READ) ?
1821                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1822         riu = ioctx->rdma_ius;
1823         dma_len = sg_dma_len(&scat[0]);
1824         dma_addr = sg_dma_address(&scat[0]);
1825
1826         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
1827         for (i = 0, j = 0;
1828              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1829                 rsize = be32_to_cpu(db->len);
1830                 sge = riu->sge;
1831                 k = 0;
1832
1833                 while (rsize > 0 && tsize > 0) {
1834                         sge->addr = dma_addr;
1835                         sge->lkey = ch->sport->sdev->mr->lkey;
1836
1837                         if (rsize >= dma_len) {
1838                                 sge->length =
1839                                         (tsize < dma_len) ? tsize : dma_len;
1840                                 tsize -= dma_len;
1841                                 rsize -= dma_len;
1842
1843                                 if (tsize > 0) {
1844                                         ++j;
1845                                         if (j < count) {
1846                                                 dma_len = sg_dma_len(&scat[j]);
1847                                                 dma_addr =
1848                                                     sg_dma_address(&scat[j]);
1849                                         }
1850                                 }
1851                         } else {
1852                                 sge->length = (tsize < rsize) ? tsize : rsize;
1853                                 tsize -= rsize;
1854                                 dma_len -= rsize;
1855                                 dma_addr += rsize;
1856                                 rsize = 0;
1857                         }
1858
1859                         ++k;
1860                         if (k == riu->sge_cnt && rsize > 0) {
1861                                 ++riu;
1862                                 sge = riu->sge;
1863                                 k = 0;
1864                         } else if (rsize > 0)
1865                                 ++sge;
1866                 }
1867         }
1868
1869         return 0;
1870
1871 free_mem:
1872         while (ioctx->n_rdma)
1873                 kfree(ioctx->rdma_ius[ioctx->n_rdma--].sge);
1874
1875         kfree(ioctx->rdma_ius);
1876
1877         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1878                      scat, scst_cmd_get_sg_cnt(scmnd),
1879                      scst_to_tgt_dma_dir(dir));
1880
1881         return -ENOMEM;
1882 }
1883
1884 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1885                               scst_data_direction dir)
1886 {
1887         struct ib_send_wr wr;
1888         struct ib_send_wr *bad_wr;
1889         struct rdma_iu *riu;
1890         int i;
1891         int ret = 0;
1892
1893         riu = ioctx->rdma_ius;
1894         memset(&wr, 0, sizeof wr);
1895
1896         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
1897                 wr.opcode = (dir == SCST_DATA_READ) ?
1898                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
1899                 wr.next = NULL;
1900                 wr.wr_id = ioctx->index;
1901                 wr.wr.rdma.remote_addr = riu->raddr;
1902                 wr.wr.rdma.rkey = riu->rkey;
1903                 wr.num_sge = riu->sge_cnt;
1904                 wr.sg_list = riu->sge;
1905
1906                 /* only get completion event for the last rdma wr */
1907                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
1908                         wr.send_flags = IB_SEND_SIGNALED;
1909
1910                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
1911                 if (ret)
1912                         break;
1913         }
1914
1915         return ret;
1916 }
1917
1918 /*
1919  * Start data reception. Must not block.
1920  */
1921 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1922                           struct scst_cmd *scmnd)
1923 {
1924         int ret;
1925
1926         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
1927         if (ret) {
1928                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1929                 ret = SCST_TGT_RES_QUEUE_FULL;
1930                 goto out;
1931         }
1932
1933         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
1934         if (ret) {
1935                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1936                 if (ret == -EAGAIN || ret == -ENOMEM)
1937                         ret = SCST_TGT_RES_QUEUE_FULL;
1938                 else
1939                         ret = SCST_TGT_RES_FATAL_ERROR;
1940                 goto out;
1941         }
1942
1943         ret = SCST_TGT_RES_SUCCESS;
1944
1945 out:
1946         return ret;
1947 }
1948
1949 /*
1950  * Called by the SCST core to inform ib_srpt that data reception should start.
1951  * Must not block.
1952  */
1953 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
1954 {
1955         struct srpt_rdma_ch *ch;
1956         struct srpt_ioctx *ioctx;
1957
1958         ioctx = scst_cmd_get_tgt_priv(scmnd);
1959         BUG_ON(!ioctx);
1960
1961         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
1962         BUG_ON(!ch);
1963
1964         if (ch->state == RDMA_CHANNEL_DISCONNECTING)
1965                 return SCST_TGT_RES_FATAL_ERROR;
1966         else if (ch->state == RDMA_CHANNEL_CONNECTING)
1967                 return SCST_TGT_RES_QUEUE_FULL;
1968
1969         return srpt_xfer_data(ch, ioctx, scmnd);
1970 }
1971
1972 /*
1973  * Called by the SCST core. Transmits the response buffer and status held in
1974  * 'scmnd'. Must not block.
1975  */
1976 static int srpt_xmit_response(struct scst_cmd *scmnd)
1977 {
1978         struct srpt_rdma_ch *ch;
1979         struct srpt_ioctx *ioctx;
1980         struct srp_rsp *srp_rsp;
1981         u64 tag;
1982         int ret = SCST_TGT_RES_SUCCESS;
1983         int dir;
1984         int status;
1985
1986         ioctx = scst_cmd_get_tgt_priv(scmnd);
1987         BUG_ON(!ioctx);
1988
1989         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
1990         BUG_ON(!ch);
1991
1992         tag = scst_cmd_get_tag(scmnd);
1993
1994         if (ch->state != RDMA_CHANNEL_LIVE) {
1995                 printk(KERN_ERR PFX
1996                        "%s: tag= %lld channel in bad state %d\n",
1997                        __func__, (unsigned long long)tag, ch->state);
1998
1999                 if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2000                         ret = SCST_TGT_RES_FATAL_ERROR;
2001                 else if (ch->state == RDMA_CHANNEL_CONNECTING)
2002                         ret = SCST_TGT_RES_QUEUE_FULL;
2003
2004                 if (unlikely(scst_cmd_aborted(scmnd)))
2005                         goto out_aborted;
2006
2007                 goto out;
2008         }
2009
2010         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
2011                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
2012
2013         srp_rsp = ioctx->buf;
2014
2015         if (unlikely(scst_cmd_aborted(scmnd))) {
2016                 printk(KERN_ERR PFX
2017                        "%s: tag= %lld already get aborted\n",
2018                        __func__, (unsigned long long)tag);
2019                 goto out_aborted;
2020         }
2021
2022         dir = scst_cmd_get_data_direction(scmnd);
2023         status = scst_cmd_get_status(scmnd) & 0xff;
2024
2025         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2026
2027         if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2028                 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2029                 if (srp_rsp->sense_data_len >
2030                     (MAX_MESSAGE_SIZE - sizeof *srp_rsp))
2031                         srp_rsp->sense_data_len =
2032                             MAX_MESSAGE_SIZE - sizeof *srp_rsp;
2033
2034                 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2035                        srp_rsp->sense_data_len);
2036
2037                 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2038                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2039
2040                 if (!status)
2041                         status = SAM_STAT_CHECK_CONDITION;
2042         }
2043
2044         srp_rsp->status = status;
2045
2046         /* transfer read data if any */
2047         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2048                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2049                 if (ret != SCST_TGT_RES_SUCCESS) {
2050                         printk(KERN_ERR PFX
2051                                "%s: tag= %lld xfer_data failed\n",
2052                                __func__, (unsigned long long)tag);
2053                         goto out;
2054                 }
2055         }
2056
2057         if (srpt_post_send(ch, ioctx,
2058                            sizeof *srp_rsp +
2059                            be32_to_cpu(srp_rsp->sense_data_len))) {
2060                 printk(KERN_ERR PFX "%s: ch->state= %d tag= %lld\n",
2061                        __func__, ch->state,
2062                        (unsigned long long)tag);
2063                 ret = SCST_TGT_RES_FATAL_ERROR;
2064         }
2065
2066 out:
2067         return ret;
2068
2069 out_aborted:
2070         ret = SCST_TGT_RES_SUCCESS;
2071         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
2072         scst_tgt_cmd_done(scmnd, SCST_CONTEXT_SAME);
2073         goto out;
2074 }
2075
2076 /*
2077  * Called by the SCST core to inform ib_srpt that a received task management
2078  * function has been completed. Must not block.
2079  */
2080 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2081 {
2082         struct srpt_rdma_ch *ch;
2083         struct srpt_mgmt_ioctx *mgmt_ioctx;
2084         struct srpt_ioctx *ioctx;
2085
2086         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2087         BUG_ON(!mgmt_ioctx);
2088
2089         ch = mgmt_ioctx->ch;
2090         BUG_ON(!ch);
2091
2092         ioctx = mgmt_ioctx->ioctx;
2093         BUG_ON(!ioctx);
2094
2095         printk(KERN_WARNING PFX
2096                "%s: tsk_mgmt_done for tag= %lld status=%d\n",
2097                __func__, (unsigned long long)mgmt_ioctx->tag,
2098                scst_mgmt_cmd_get_status(mcmnd));
2099
2100         srpt_build_tskmgmt_rsp(ch, ioctx,
2101                                (scst_mgmt_cmd_get_status(mcmnd) ==
2102                                 SCST_MGMT_STATUS_SUCCESS) ?
2103                                SRP_TSK_MGMT_SUCCESS : SRP_TSK_MGMT_FAILED,
2104                                mgmt_ioctx->tag);
2105         srpt_post_send(ch, ioctx, sizeof(struct srp_rsp) + 4);
2106
2107         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2108
2109         kfree(mgmt_ioctx);
2110 }
2111
2112 /*
2113  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2114  * to be freed. May be called in IRQ context.
2115  */
2116 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2117 {
2118         struct srpt_rdma_ch *ch;
2119         struct srpt_ioctx *ioctx;
2120
2121         ioctx = scst_cmd_get_tgt_priv(scmnd);
2122         BUG_ON(!ioctx);
2123
2124         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2125         BUG_ON(!ch);
2126
2127         spin_lock_irq(&ch->spinlock);
2128         list_del(&ioctx->scmnd_list);
2129         ch->active_scmnd_cnt--;
2130         spin_unlock_irq(&ch->spinlock);
2131
2132         srpt_reset_ioctx(ch, ioctx);
2133         scst_cmd_set_tgt_priv(scmnd, NULL);
2134 }
2135
2136 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2137 static void srpt_refresh_port_work(void *ctx)
2138 #else
2139 static void srpt_refresh_port_work(struct work_struct *work)
2140 #endif
2141 {
2142 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2143         struct srpt_port *sport = (struct srpt_port *)ctx;
2144 #else
2145         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2146 #endif
2147
2148         srpt_refresh_port(sport);
2149 }
2150
2151 /*
2152  * Called by the SCST core to detect target adapters. Returns the number of
2153  * detected target adapters.
2154  */
2155 static int srpt_detect(struct scst_tgt_template *tp)
2156 {
2157         struct srpt_device *sdev;
2158         int count = 0;
2159
2160         list_for_each_entry(sdev, &srpt_devices, list)
2161                 ++count;
2162         return count;
2163 }
2164
2165 /*
2166  * Callback function called by the SCST core from scst_unregister() to free up
2167  * the resources associated with device scst_tgt.
2168  */
2169 static int srpt_release(struct scst_tgt *scst_tgt)
2170 {
2171         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2172         struct srpt_rdma_ch *ch, *tmp_ch;
2173
2174         BUG_ON(!scst_tgt);
2175 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2176         WARN_ON(!sdev);
2177         if (!sdev)
2178                 return -ENODEV;
2179 #else
2180         if (WARN_ON(!sdev))
2181                 return -ENODEV;
2182 #endif
2183
2184         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list)
2185             srpt_release_channel(ch, 1);
2186
2187         srpt_unregister_mad_agent(sdev);
2188
2189         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2190
2191         return 0;
2192 }
2193
2194 /*
2195  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2196  * when the module parameter 'thread' is not zero (the default is zero).
2197  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2198  *
2199  * @pre thread != 0
2200  */
2201 static int srpt_ioctx_thread(void *arg)
2202 {
2203         struct srpt_ioctx *ioctx;
2204
2205         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2206         current->flags |= PF_NOFREEZE;
2207
2208         spin_lock_irq(&srpt_thread.thread_lock);
2209         while (!kthread_should_stop()) {
2210                 wait_queue_t wait;
2211                 init_waitqueue_entry(&wait, current);
2212
2213                 if (!srpt_test_ioctx_list()) {
2214                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2215
2216                         for (;;) {
2217                                 set_current_state(TASK_INTERRUPTIBLE);
2218                                 if (srpt_test_ioctx_list())
2219                                         break;
2220                                 spin_unlock_irq(&srpt_thread.thread_lock);
2221                                 schedule();
2222                                 spin_lock_irq(&srpt_thread.thread_lock);
2223                         }
2224                         set_current_state(TASK_RUNNING);
2225                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2226                 }
2227
2228                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2229                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2230                                            struct srpt_ioctx, comp_list);
2231
2232                         list_del(&ioctx->comp_list);
2233
2234                         spin_unlock_irq(&srpt_thread.thread_lock);
2235                         switch (ioctx->op) {
2236                         case IB_WC_SEND:
2237                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2238                                         SCST_CONTEXT_DIRECT);
2239                                 break;
2240                         case IB_WC_RDMA_WRITE:
2241                         case IB_WC_RDMA_READ:
2242                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2243                                 break;
2244                         case IB_WC_RECV:
2245                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2246                                 break;
2247                         default:
2248                                 break;
2249                         }
2250                         spin_lock_irq(&srpt_thread.thread_lock);
2251                 }
2252         }
2253         spin_unlock_irq(&srpt_thread.thread_lock);
2254
2255         return 0;
2256 }
2257
2258 /* SCST target template for the SRP target implementation. */
2259 static struct scst_tgt_template srpt_template = {
2260         .name = DRV_NAME,
2261         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2262         .xmit_response_atomic = 1,
2263         .rdy_to_xfer_atomic = 1,
2264         .no_proc_entry = 1,
2265         .detect = srpt_detect,
2266         .release = srpt_release,
2267         .xmit_response = srpt_xmit_response,
2268         .rdy_to_xfer = srpt_rdy_to_xfer,
2269         .on_free_cmd = srpt_on_free_cmd,
2270         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2271 };
2272
2273 /*
2274  * The callback function srpt_release_class_dev() is called whenever a
2275  * device is removed from the /sys/class/infiniband_srpt device class.
2276  */
2277 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2278 static void srpt_release_class_dev(struct class_device *class_dev)
2279 #else
2280 static void srpt_release_class_dev(struct device *dev)
2281 #endif
2282 {
2283 }
2284
2285 static struct class srpt_class = {
2286         .name = "infiniband_srpt",
2287 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2288         .release = srpt_release_class_dev
2289 #else
2290         .dev_release = srpt_release_class_dev
2291 #endif
2292 };
2293
2294 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2295 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2296 #else
2297 static ssize_t show_login_info(struct device *dev,
2298                                struct device_attribute *attr, char *buf)
2299 #endif
2300 {
2301         struct srpt_device *sdev =
2302 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2303                 container_of(class_dev, struct srpt_device, class_dev);
2304 #else
2305                 container_of(dev, struct srpt_device, dev);
2306 #endif
2307         struct srpt_port *sport;
2308         int i;
2309         int len = 0;
2310
2311         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2312                 sport = &sdev->port[i];
2313
2314                 len += sprintf(buf + len,
2315                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2316                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2317                                "service_id=%016llx\n",
2318                                (unsigned long long) mellanox_ioc_guid,
2319                                (unsigned long long) mellanox_ioc_guid,
2320                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2321                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2322                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2323                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2324                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2325                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2326                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2327                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2328                                (unsigned long long) mellanox_ioc_guid);
2329         }
2330
2331         return len;
2332 }
2333
2334 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2335 static CLASS_DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2336 #else
2337 static DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2338 #endif
2339
2340 /*
2341  * Callback function called by the InfiniBand core when either an InfiniBand
2342  * device has been added or during the ib_register_client() call for each
2343  * registered InfiniBand device.
2344  */
2345 static void srpt_add_one(struct ib_device *device)
2346 {
2347         struct srpt_device *sdev;
2348         struct srpt_port *sport;
2349         struct ib_srq_init_attr srq_attr;
2350         int i;
2351
2352         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2353         if (!sdev)
2354                 return;
2355
2356         sdev->device = device;
2357
2358 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2359         sdev->class_dev.class = &srpt_class;
2360         sdev->class_dev.dev = device->dma_device;
2361         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2362                  "srpt-%s", device->name);
2363 #else
2364         sdev->dev.class = &srpt_class;
2365         sdev->dev.parent = device->dma_device;
2366 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2367         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2368 #else
2369         snprintf(sdev->init_name, sizeof(sdev->init_name),
2370                  "srpt-%s", device->name);
2371         sdev->dev.init_name = sdev->init_name;
2372 #endif
2373 #endif
2374
2375 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2376         if (class_device_register(&sdev->class_dev))
2377                 goto free_dev;
2378         if (class_device_create_file(&sdev->class_dev,
2379                                      &class_device_attr_login_info))
2380                 goto err_dev;
2381 #else
2382         if (device_register(&sdev->dev))
2383                 goto free_dev;
2384         if (device_create_file(&sdev->dev, &dev_attr_login_info))
2385                 goto err_dev;
2386 #endif
2387
2388         if (ib_query_device(device, &sdev->dev_attr))
2389                 goto err_dev;
2390
2391         sdev->pd = ib_alloc_pd(device);
2392         if (IS_ERR(sdev->pd))
2393                 goto err_dev;
2394
2395         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2396         if (IS_ERR(sdev->mr))
2397                 goto err_pd;
2398
2399         srq_attr.event_handler = srpt_srq_event;
2400         srq_attr.srq_context = (void *)sdev;
2401         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2402         srq_attr.attr.max_sge = 1;
2403         srq_attr.attr.srq_limit = 0;
2404
2405         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2406         if (IS_ERR(sdev->srq))
2407                 goto err_mr;
2408
2409         printk(KERN_DEBUG PFX "%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
2410                __func__, srq_attr.attr.max_wr,
2411               sdev->dev_attr.max_srq_wr, device->name);
2412
2413         if (!mellanox_ioc_guid)
2414                 mellanox_ioc_guid = be64_to_cpu(device->node_guid);
2415
2416         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2417         if (IS_ERR(sdev->cm_id))
2418                 goto err_srq;
2419
2420         /* print out target login information */
2421         printk(KERN_DEBUG PFX "Target login info: id_ext=%016llx,"
2422                 "ioc_guid=%016llx,pkey=ffff,service_id=%016llx\n",
2423                 (unsigned long long) mellanox_ioc_guid,
2424                 (unsigned long long) mellanox_ioc_guid,
2425                 (unsigned long long) mellanox_ioc_guid);
2426
2427         /*
2428          * We do not have a consistent service_id (ie. also id_ext of target_id)
2429          * to identify this target. We currently use the guid of the first HCA
2430          * in the system as service_id; therefore, the target_id will change
2431          * if this HCA is gone bad and replaced by different HCA
2432          */
2433         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(mellanox_ioc_guid), 0, NULL))
2434                 goto err_cm;
2435
2436         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2437                               srpt_event_handler);
2438         if (ib_register_event_handler(&sdev->event_handler))
2439                 goto err_cm;
2440
2441         if (srpt_alloc_ioctx_ring(sdev))
2442                 goto err_event;
2443
2444         INIT_LIST_HEAD(&sdev->rch_list);
2445         spin_lock_init(&sdev->spinlock);
2446
2447         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2448                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2449
2450         list_add_tail(&sdev->list, &srpt_devices);
2451
2452         ib_set_client_data(device, &srpt_client, sdev);
2453
2454         sdev->scst_tgt = scst_register(&srpt_template, NULL);
2455         if (!sdev->scst_tgt) {
2456                 printk(KERN_ERR PFX "SCST registration failed for %s.\n",
2457                         sdev->device->name);
2458                 goto err_ring;
2459         }
2460
2461         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
2462
2463         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
2464                 sport = &sdev->port[i - 1];
2465                 sport->sdev = sdev;
2466                 sport->port = i;
2467 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2468                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
2469 #else
2470                 INIT_WORK(&sport->work, srpt_refresh_port_work);
2471 #endif
2472                 if (srpt_refresh_port(sport)) {
2473                         printk(KERN_ERR PFX "MAD registration failed"
2474                                " for %s-%d.\n", sdev->device->name, i);
2475                         goto err_refresh_port;
2476                 }
2477         }
2478
2479         return;
2480
2481 err_refresh_port:
2482         scst_unregister(sdev->scst_tgt);
2483 err_ring:
2484         ib_set_client_data(device, &srpt_client, NULL);
2485         list_del(&sdev->list);
2486         srpt_free_ioctx_ring(sdev);
2487 err_event:
2488         ib_unregister_event_handler(&sdev->event_handler);
2489 err_cm:
2490         ib_destroy_cm_id(sdev->cm_id);
2491 err_srq:
2492         ib_destroy_srq(sdev->srq);
2493 err_mr:
2494         ib_dereg_mr(sdev->mr);
2495 err_pd:
2496         ib_dealloc_pd(sdev->pd);
2497 err_dev:
2498 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2499         class_device_unregister(&sdev->class_dev);
2500 #else
2501         device_unregister(&sdev->dev);
2502 #endif
2503 free_dev:
2504         kfree(sdev);
2505 }
2506
2507 /*
2508  * Callback function called by the InfiniBand core when either an InfiniBand
2509  * device has been removed or during the ib_unregister_client() call for each
2510  * registered InfiniBand device.
2511  */
2512 static void srpt_remove_one(struct ib_device *device)
2513 {
2514         struct srpt_device *sdev;
2515
2516         sdev = ib_get_client_data(device, &srpt_client);
2517 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2518         WARN_ON(!sdev);
2519         if (!sdev)
2520                 return;
2521 #else
2522         if (WARN_ON(!sdev))
2523                 return;
2524 #endif
2525
2526         scst_unregister(sdev->scst_tgt);
2527         sdev->scst_tgt = NULL;
2528
2529         ib_unregister_event_handler(&sdev->event_handler);
2530         ib_destroy_cm_id(sdev->cm_id);
2531         ib_destroy_srq(sdev->srq);
2532         ib_dereg_mr(sdev->mr);
2533         ib_dealloc_pd(sdev->pd);
2534 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2535         class_device_unregister(&sdev->class_dev);
2536 #else
2537         device_unregister(&sdev->dev);
2538 #endif
2539
2540         srpt_free_ioctx_ring(sdev);
2541         list_del(&sdev->list);
2542         kfree(sdev);
2543 }
2544
2545 /*
2546  * Module initialization.
2547  *
2548  * Note: since ib_register_client() registers callback functions, and since at
2549  * least one of these callback functions (srpt_add_one()) calls SCST functions,
2550  * the SCST target template must be registered before ib_register_client() is
2551  * called.
2552  */
2553 static int __init srpt_init_module(void)
2554 {
2555         int ret;
2556
2557         INIT_LIST_HEAD(&srpt_devices);
2558
2559         ret = class_register(&srpt_class);
2560         if (ret) {
2561                 printk(KERN_ERR PFX "couldn't register class ib_srpt\n");
2562                 return ret;
2563         }
2564
2565         ret = scst_register_target_template(&srpt_template);
2566         if (ret < 0) {
2567                 printk(KERN_ERR PFX "couldn't register with scst\n");
2568                 ret = -ENODEV;
2569                 goto mem_out;
2570         }
2571
2572         ret = ib_register_client(&srpt_client);
2573         if (ret) {
2574                 printk(KERN_ERR PFX "couldn't register IB client\n");
2575                 goto scst_out;
2576         }
2577
2578         if (thread) {
2579                 spin_lock_init(&srpt_thread.thread_lock);
2580                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
2581                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
2582                                                  NULL, "srpt_thread");
2583                 if (IS_ERR(srpt_thread.thread)) {
2584                         srpt_thread.thread = NULL;
2585                         thread = 0;
2586                 }
2587         }
2588
2589         return 0;
2590
2591 scst_out:
2592         scst_unregister_target_template(&srpt_template);
2593 mem_out:
2594         class_unregister(&srpt_class);
2595         return ret;
2596 }
2597
2598 static void __exit srpt_cleanup_module(void)
2599 {
2600         if (srpt_thread.thread)
2601                 kthread_stop(srpt_thread.thread);
2602         ib_unregister_client(&srpt_client);
2603         scst_unregister_target_template(&srpt_template);
2604         class_unregister(&srpt_class);
2605 }
2606
2607 module_init(srpt_init_module);
2608 module_exit(srpt_cleanup_module);