f8ec3d873519274dd54b74b941195f78697bfbe5
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/string.h>
41 #include <linux/kthread.h>
42
43 #include <asm/atomic.h>
44
45 #include "ib_srpt.h"
46
47 /* Name of this kernel module. */
48 #define DRV_NAME                "ib_srpt"
49 /* Prefix for printk() kernel messages. */
50 #define PFX                     DRV_NAME ": "
51 #define DRV_VERSION             "1.0.1"
52 #define DRV_RELDATE             "July 10, 2008"
53
54 #define MELLANOX_SRPT_ID_STRING "Mellanox OFED SRP target"
55
56 MODULE_AUTHOR("Vu Pham");
57 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
58                    "v" DRV_VERSION " (" DRV_RELDATE ")");
59 MODULE_LICENSE("Dual BSD/GPL");
60
61 struct srpt_thread {
62         /* Protects thread_ioctx_list. */
63         spinlock_t thread_lock;
64         /* I/O contexts to be processed by the kernel thread. */
65         struct list_head thread_ioctx_list;
66         /* SRPT kernel thread. */
67         struct task_struct *thread;
68 };
69
70 static u64 mellanox_ioc_guid;
71 /* List of srpt_device structures. */
72 static struct list_head srpt_devices;
73 static int thread;
74 static struct srpt_thread srpt_thread;
75 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
76
77 module_param(thread, int, 0444);
78 MODULE_PARM_DESC(thread,
79                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
80                  "where possible");
81
82 static void srpt_add_one(struct ib_device *device);
83 static void srpt_remove_one(struct ib_device *device);
84 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq);
85 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
86
87 static struct ib_client srpt_client = {
88         .name = DRV_NAME,
89         .add = srpt_add_one,
90         .remove = srpt_remove_one
91 };
92
93 /*
94  * Callback function called by the InfiniBand core when an asynchronous IB
95  * event occurs. This callback may occur in interrupt context. See also
96  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
97  * Architecture Specification.
98  */
99 static void srpt_event_handler(struct ib_event_handler *handler,
100                                struct ib_event *event)
101 {
102         struct srpt_device *sdev =
103             ib_get_client_data(event->device, &srpt_client);
104         struct srpt_port *sport;
105
106         if (!sdev || sdev->device != event->device)
107                 return;
108
109         printk(KERN_WARNING PFX "ASYNC event= %d on device= %s\n",
110                 event->event, sdev->device->name);
111
112         switch (event->event) {
113         case IB_EVENT_PORT_ERR:
114                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
115                         sport = &sdev->port[event->element.port_num - 1];
116                         sport->lid = 0;
117                         sport->sm_lid = 0;
118                 }
119                 break;
120         case IB_EVENT_PORT_ACTIVE:
121         case IB_EVENT_LID_CHANGE:
122         case IB_EVENT_PKEY_CHANGE:
123         case IB_EVENT_SM_CHANGE:
124         case IB_EVENT_CLIENT_REREGISTER:
125                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
126                         sport = &sdev->port[event->element.port_num - 1];
127                         if (!sport->lid && !sport->sm_lid)
128                                 schedule_work(&sport->work);
129                 }
130                 break;
131         default:
132                 break;
133         }
134
135 }
136
137 /*
138  * Callback function called by the InfiniBand core for SRQ (shared receive
139  * queue) events.
140  */
141 static void srpt_srq_event(struct ib_event *event, void *ctx)
142 {
143         printk(KERN_WARNING PFX "SRQ event %d\n", event->event);
144 }
145
146 /*
147  * Callback function called by the InfiniBand core for QP (queue pair) events.
148  */
149 static void srpt_qp_event(struct ib_event *event, void *ctx)
150 {
151         struct srpt_rdma_ch *ch = ctx;
152
153         printk(KERN_WARNING PFX
154                "QP event %d on cm_id=%p sess_name=%s state=%d\n",
155                event->event, ch->cm_id, ch->sess_name, ch->state);
156
157         switch (event->event) {
158         case IB_EVENT_COMM_EST:
159 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
160                 ib_cm_notify(ch->cm_id, event->event);
161 #else
162                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
163                 printk(KERN_ERR PFX "how to perform ib_cm_notify() on a"
164                         " vanilla 2.6.18 kernel ???\n");
165 #endif
166                 break;
167         case IB_EVENT_QP_LAST_WQE_REACHED:
168                 if (ch->state == RDMA_CHANNEL_LIVE) {
169                         printk(KERN_WARNING PFX
170                                "Schedule CM_DISCONNECT_WORK\n");
171                         srpt_disconnect_channel(ch, 1);
172                 }
173                 break;
174         default:
175                 break;
176         }
177 }
178
179 /*
180  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
181  * the lowest four bits of value in element slot of the array of four bit
182  * elements called c_list (controller list). The index slot is one-based.
183  *
184  * @pre 1 <= slot && 0 <= value && value < 16
185  */
186 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
187 {
188         u16 id;
189         u8 tmp;
190
191         id = (slot - 1) / 2;
192         if (slot & 0x1) {
193                 tmp = c_list[id] & 0xf;
194                 c_list[id] = (value << 4) | tmp;
195         } else {
196                 tmp = c_list[id] & 0xf0;
197                 c_list[id] = (value & 0xf) | tmp;
198         }
199 }
200
201 /*
202  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
203  * ClassPortInfo in the InfiniBand Architecture Specification.
204  */
205 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
206 {
207         struct ib_class_port_info *cif;
208
209         cif = (struct ib_class_port_info *)mad->data;
210         memset(cif, 0, sizeof *cif);
211         cif->base_version = 1;
212         cif->class_version = 1;
213         cif->resp_time_value = 20;
214
215         mad->mad_hdr.status = 0;
216 }
217
218 /*
219  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
220  * InfiniBand Architecture Specification. See also section B.7,
221  * table B.6 in the T10 SRP r16a document.
222  */
223 static void srpt_get_iou(struct ib_dm_mad *mad)
224 {
225         struct ib_dm_iou_info *ioui;
226         u8 slot;
227         int i;
228
229         ioui = (struct ib_dm_iou_info *)mad->data;
230         ioui->change_id = 1;
231         ioui->max_controllers = 16;
232
233         /* set present for slot 1 and empty for the rest */
234         srpt_set_ioc(ioui->controller_list, 1, 1);
235         for (i = 1, slot = 2; i < 16; i++, slot++)
236                 srpt_set_ioc(ioui->controller_list, slot, 0);
237
238         mad->mad_hdr.status = 0;
239 }
240
241 /*
242  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
243  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
244  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
245  * document.
246  */
247 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
248                          struct ib_dm_mad *mad)
249 {
250         struct ib_dm_ioc_profile *iocp;
251
252         iocp = (struct ib_dm_ioc_profile *)mad->data;
253
254         if (!slot || slot > 16) {
255                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
256                 return;
257         }
258
259         if (slot > 2) {
260                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
261                 return;
262         }
263
264         memset(iocp, 0, sizeof *iocp);
265         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
266         iocp->guid = cpu_to_be64(mellanox_ioc_guid);
267         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
268         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
269         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
270         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
271         iocp->subsys_device_id = 0x0;
272         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
273         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
274         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
275         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
276         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
277         iocp->rdma_read_depth = 4;
278         iocp->send_size = cpu_to_be32(MAX_MESSAGE_SIZE);
279         iocp->rdma_size = cpu_to_be32(MAX_RDMA_SIZE);
280         iocp->num_svc_entries = 1;
281         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
282             SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
283
284         mad->mad_hdr.status = 0;
285 }
286
287 /*
288  * Device management: write ServiceEntries to mad for the given slot. See also
289  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
290  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
291  */
292 static void srpt_get_svc_entries(u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
293 {
294         struct ib_dm_svc_entries *svc_entries;
295
296         if (!slot || slot > 16) {
297                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
298                 return;
299         }
300
301         if (slot > 2 || lo > hi || hi > 1) {
302                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
303                 return;
304         }
305
306         svc_entries = (struct ib_dm_svc_entries *)mad->data;
307         memset(svc_entries, 0, sizeof *svc_entries);
308         svc_entries->service_entries[0].id = cpu_to_be64(mellanox_ioc_guid);
309         sprintf(svc_entries->service_entries[0].name, "%s%016llx",
310                 SRP_SERVICE_NAME_PREFIX, (unsigned long long)mellanox_ioc_guid);
311
312         mad->mad_hdr.status = 0;
313 }
314
315 /*
316  * Actual processing of a received MAD *rq_mad received through source port *sp
317  * (MAD = InfiniBand management datagram). The response to be sent back is
318  * written to *rsp_mad.
319  */
320 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
321                                  struct ib_dm_mad *rsp_mad)
322 {
323         u16 attr_id;
324         u32 slot;
325         u8 hi, lo;
326
327         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
328         switch (attr_id) {
329         case DM_ATTR_CLASS_PORT_INFO:
330                 srpt_get_class_port_info(rsp_mad);
331                 break;
332         case DM_ATTR_IOU_INFO:
333                 srpt_get_iou(rsp_mad);
334                 break;
335         case DM_ATTR_IOC_PROFILE:
336                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
337                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
338                 break;
339         case DM_ATTR_SVC_ENTRIES:
340                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
341                 hi = (u8) ((slot >> 8) & 0xff);
342                 lo = (u8) (slot & 0xff);
343                 slot = (u16) ((slot >> 16) & 0xffff);
344                 srpt_get_svc_entries(slot, hi, lo, rsp_mad);
345                 break;
346         default:
347                 rsp_mad->mad_hdr.status =
348                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
349                 break;
350         }
351 }
352
353 /*
354  * Callback function that is called by the InfiniBand core after transmission of
355  * a MAD. (MAD = management datagram; AH = address handle.)
356  */
357 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
358                                   struct ib_mad_send_wc *mad_wc)
359 {
360         ib_destroy_ah(mad_wc->send_buf->ah);
361         ib_free_send_mad(mad_wc->send_buf);
362 }
363
364 /*
365  * Callback function that is called by the InfiniBand core after reception of
366  * a MAD (management datagram).
367  */
368 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
369                                   struct ib_mad_recv_wc *mad_wc)
370 {
371         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
372         struct ib_ah *ah;
373         struct ib_mad_send_buf *rsp;
374         struct ib_dm_mad *dm_mad;
375
376         if (!mad_wc || !mad_wc->recv_buf.mad)
377                 return;
378
379         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
380                                   mad_wc->recv_buf.grh, mad_agent->port_num);
381         if (IS_ERR(ah))
382                 goto err;
383
384         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
385
386         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
387                                  mad_wc->wc->pkey_index, 0,
388                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
389                                  GFP_KERNEL);
390         if (IS_ERR(rsp))
391                 goto err_rsp;
392
393         rsp->ah = ah;
394
395         dm_mad = rsp->mad;
396         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
397         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
398         dm_mad->mad_hdr.status = 0;
399
400         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
401         case IB_MGMT_METHOD_GET:
402                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
403                 break;
404         case IB_MGMT_METHOD_SET:
405                 dm_mad->mad_hdr.status =
406                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
407                 break;
408         default:
409                 dm_mad->mad_hdr.status =
410                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
411                 break;
412         }
413
414         if (!ib_post_send_mad(rsp, NULL)) {
415                 ib_free_recv_mad(mad_wc);
416                 /* will destroy_ah & free_send_mad in send completion */
417                 return;
418         }
419
420         ib_free_send_mad(rsp);
421
422 err_rsp:
423         ib_destroy_ah(ah);
424 err:
425         ib_free_recv_mad(mad_wc);
426 }
427
428 /*
429  * Enable InfiniBand management datagram processing, update the cached sm_lid,
430  * lid and gid values, and register a callback function for processing MADs
431  * on the specified port. It is safe to call this function more than once for
432  * the same port.
433  */
434 static int srpt_refresh_port(struct srpt_port *sport)
435 {
436         struct ib_mad_reg_req reg_req;
437         struct ib_port_modify port_modify;
438         struct ib_port_attr port_attr;
439         int ret;
440
441         memset(&port_modify, 0, sizeof port_modify);
442         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
443         port_modify.clr_port_cap_mask = 0;
444
445         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
446         if (ret)
447                 goto err_mod_port;
448
449         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
450         if (ret)
451                 goto err_query_port;
452
453         sport->sm_lid = port_attr.sm_lid;
454         sport->lid = port_attr.lid;
455
456         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
457         if (ret)
458                 goto err_query_port;
459
460         if (!sport->mad_agent) {
461                 memset(&reg_req, 0, sizeof reg_req);
462                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
463                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
464                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
465                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
466
467                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
468                                                          sport->port,
469                                                          IB_QPT_GSI,
470                                                          &reg_req, 0,
471                                                          srpt_mad_send_handler,
472                                                          srpt_mad_recv_handler,
473                                                          sport);
474                 if (IS_ERR(sport->mad_agent)) {
475                         ret = PTR_ERR(sport->mad_agent);
476                         sport->mad_agent = NULL;
477                         goto err_query_port;
478                 }
479         }
480
481         return 0;
482
483 err_query_port:
484
485         port_modify.set_port_cap_mask = 0;
486         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
487         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
488
489 err_mod_port:
490
491         return ret;
492 }
493
494 /*
495  * Unregister the callback function for processing MADs and disable MAD
496  * processing for all ports of the specified device. It is safe to call this
497  * function more than once for the same device.
498  */
499 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
500 {
501         struct ib_port_modify port_modify = {
502                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
503         };
504         struct srpt_port *sport;
505         int i;
506
507         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
508                 sport = &sdev->port[i - 1];
509                 WARN_ON(sport->port != i);
510                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
511                         printk(KERN_ERR PFX "disabling MAD processing"
512                                " failed.\n");
513                 if (sport->mad_agent) {
514                         ib_unregister_mad_agent(sport->mad_agent);
515                         sport->mad_agent = NULL;
516                 }
517         }
518 }
519
520 /*
521  * Allocate and initialize an SRPT I/O context structure.
522  */
523 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
524 {
525         struct srpt_ioctx *ioctx;
526
527         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
528         if (!ioctx)
529                 goto out;
530
531         ioctx->buf = kzalloc(MAX_MESSAGE_SIZE, GFP_KERNEL);
532         if (!ioctx->buf)
533                 goto out_free_ioctx;
534
535         ioctx->dma = dma_map_single(sdev->device->dma_device, ioctx->buf,
536                                     MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
537 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
538         if (dma_mapping_error(sdev->device->dma_device, ioctx->dma))
539 #else
540         if (dma_mapping_error(ioctx->dma))
541 #endif
542                 goto out_free_buf;
543
544         return ioctx;
545
546 out_free_buf:
547         kfree(ioctx->buf);
548 out_free_ioctx:
549         kfree(ioctx);
550 out:
551         return NULL;
552 }
553
554 /*
555  * Deallocate an SRPT I/O context structure.
556  */
557 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
558 {
559         if (!ioctx)
560                 return;
561
562         dma_unmap_single(sdev->device->dma_device, ioctx->dma,
563                          MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
564         kfree(ioctx->buf);
565         kfree(ioctx);
566 }
567
568 /*
569  * Associate a ring of SRPT I/O context structures with the specified device.
570  */
571 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
572 {
573         int i;
574
575         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
576                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
577
578                 if (!sdev->ioctx_ring[i])
579                         goto err;
580
581                 sdev->ioctx_ring[i]->index = i;
582         }
583
584         return 0;
585
586 err:
587         while (--i > 0) {
588                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
589                 sdev->ioctx_ring[i] = NULL;
590         }
591         return -ENOMEM;
592 }
593
594 /* Free the ring of SRPT I/O context structures. */
595 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
596 {
597         int i;
598
599         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
600                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
601                 sdev->ioctx_ring[i] = NULL;
602         }
603 }
604
605 /*
606  * Post a receive request on the work queue of InfiniBand device 'sdev'.
607  */
608 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
609 {
610         struct ib_sge list;
611         struct ib_recv_wr wr, *bad_wr;
612
613         wr.wr_id = ioctx->index | SRPT_OP_RECV;
614
615         list.addr = ioctx->dma;
616         list.length = MAX_MESSAGE_SIZE;
617         list.lkey = sdev->mr->lkey;
618
619         wr.next = NULL;
620         wr.sg_list = &list;
621         wr.num_sge = 1;
622
623         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
624 }
625
626 /*
627  * Post a send request on the SRPT RDMA channel 'ch'.
628  */
629 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
630                           int len)
631 {
632         struct ib_sge list;
633         struct ib_send_wr wr, *bad_wr;
634         struct srpt_device *sdev = ch->sport->sdev;
635
636         dma_sync_single_for_device(sdev->device->dma_device, ioctx->dma,
637                                    MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
638
639         list.addr = ioctx->dma;
640         list.length = len;
641         list.lkey = sdev->mr->lkey;
642
643         wr.next = NULL;
644         wr.wr_id = ioctx->index;
645         wr.sg_list = &list;
646         wr.num_sge = 1;
647         wr.opcode = IB_WR_SEND;
648         wr.send_flags = IB_SEND_SIGNALED;
649
650         return ib_post_send(ch->qp, &wr, &bad_wr);
651 }
652
653 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
654                              int *ind)
655 {
656         struct srp_indirect_buf *idb;
657         struct srp_direct_buf *db;
658
659         *ind = 0;
660         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
661             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
662                 ioctx->n_rbuf = 1;
663                 ioctx->rbufs = &ioctx->single_rbuf;
664
665                 db = (void *)srp_cmd->add_data;
666                 memcpy(ioctx->rbufs, db, sizeof *db);
667                 ioctx->data_len = be32_to_cpu(db->len);
668         } else {
669                 idb = (void *)srp_cmd->add_data;
670
671                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
672
673                 if (ioctx->n_rbuf >
674                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
675                         *ind = 1;
676                         ioctx->n_rbuf = 0;
677                         goto out;
678                 }
679
680                 if (ioctx->n_rbuf == 1)
681                         ioctx->rbufs = &ioctx->single_rbuf;
682                 else
683                         ioctx->rbufs =
684                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
685                 if (!ioctx->rbufs) {
686                         ioctx->n_rbuf = 0;
687                         return -ENOMEM;
688                 }
689
690                 db = idb->desc_list;
691                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
692                 ioctx->data_len = be32_to_cpu(idb->len);
693         }
694 out:
695         return 0;
696 }
697
698 /*
699  * Modify the attributes of queue pair 'qp': allow local write, remote read,
700  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
701  */
702 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
703 {
704         struct ib_qp_attr *attr;
705         int ret;
706
707         attr = kzalloc(sizeof *attr, GFP_KERNEL);
708         if (!attr)
709                 return -ENOMEM;
710
711         attr->qp_state = IB_QPS_INIT;
712         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
713             IB_ACCESS_REMOTE_WRITE;
714         attr->port_num = ch->sport->port;
715         attr->pkey_index = 0;
716
717         ret = ib_modify_qp(qp, attr,
718                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
719                            IB_QP_PKEY_INDEX);
720
721         kfree(attr);
722         return ret;
723 }
724
725 static int srpt_ch_qp_rtr_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp,
726                               enum ib_qp_state qp_state)
727 {
728         struct ib_qp_attr *qp_attr;
729         int attr_mask;
730         int ret;
731
732         qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
733         if (!qp_attr)
734                 return -ENOMEM;
735
736         qp_attr->qp_state = qp_state;
737         ret = ib_cm_init_qp_attr(ch->cm_id, qp_attr, &attr_mask);
738         if (ret)
739                 goto out;
740
741         if (qp_state == IB_QPS_RTR)
742                 qp_attr->max_dest_rd_atomic = 4;
743         else
744                 qp_attr->max_rd_atomic = 4;
745
746         ret = ib_modify_qp(qp, qp_attr, attr_mask);
747
748 out:
749         kfree(qp_attr);
750         return ret;
751 }
752
753 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
754 {
755         int i;
756
757         if (ioctx->n_rdma_ius > 0 && ioctx->rdma_ius) {
758                 struct rdma_iu *riu = ioctx->rdma_ius;
759
760                 for (i = 0; i < ioctx->n_rdma_ius; ++i, ++riu)
761                         kfree(riu->sge);
762                 kfree(ioctx->rdma_ius);
763         }
764
765         if (ioctx->n_rbuf > 1)
766                 kfree(ioctx->rbufs);
767
768         if (srpt_post_recv(ch->sport->sdev, ioctx))
769                 printk(KERN_ERR PFX "SRQ post_recv failed - this is serious\n");
770                 /* we should queue it back to free_ioctx queue */
771         else
772                 atomic_inc(&ch->req_lim_delta);
773 }
774
775 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
776 {
777         struct srpt_ioctx *ioctx;
778         struct srpt_device *sdev = ch->sport->sdev;
779         scst_data_direction dir = SCST_DATA_NONE;
780
781         if (wc->wr_id & SRPT_OP_RECV) {
782                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
783                 printk(KERN_ERR PFX "This is serious - SRQ is in bad state\n");
784         } else {
785                 ioctx = sdev->ioctx_ring[wc->wr_id];
786
787                 if (ioctx->scmnd) {
788                         struct scst_cmd *scmnd = ioctx->scmnd;
789
790                         dir = scst_cmd_get_data_direction(scmnd);
791
792                         if (dir == SCST_DATA_NONE)
793                                 scst_tgt_cmd_done(scmnd,
794                                         scst_estimate_context());
795                         else {
796                                 dma_unmap_sg(sdev->device->dma_device,
797                                              scst_cmd_get_sg(scmnd),
798                                              scst_cmd_get_sg_cnt(scmnd),
799                                              scst_to_tgt_dma_dir(dir));
800
801                                 if (scmnd->state == SCST_CMD_STATE_DATA_WAIT)
802                                         scst_rx_data(scmnd,
803                                                      SCST_RX_STATUS_ERROR,
804                                                      SCST_CONTEXT_THREAD);
805                                 else if (scmnd->state == SCST_CMD_STATE_XMIT_WAIT)
806                                         scst_tgt_cmd_done(scmnd,
807                                                 scst_estimate_context());
808                         }
809                 } else
810                         srpt_reset_ioctx(ch, ioctx);
811         }
812 }
813
814 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
815                                   struct srpt_ioctx *ioctx,
816                                   enum scst_exec_context context)
817 {
818         if (ioctx->scmnd) {
819                 scst_data_direction dir =
820                         scst_cmd_get_data_direction(ioctx->scmnd);
821
822                 if (dir != SCST_DATA_NONE)
823                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
824                                      scst_cmd_get_sg(ioctx->scmnd),
825                                      scst_cmd_get_sg_cnt(ioctx->scmnd),
826                                      scst_to_tgt_dma_dir(dir));
827
828                 scst_tgt_cmd_done(ioctx->scmnd, context);
829         } else
830                 srpt_reset_ioctx(ch, ioctx);
831 }
832
833 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
834                                   struct srpt_ioctx *ioctx)
835 {
836         if (!ioctx->scmnd) {
837                 srpt_reset_ioctx(ch, ioctx);
838                 return;
839         }
840
841         if (scst_cmd_get_data_direction(ioctx->scmnd) == SCST_DATA_WRITE)
842                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
843                         scst_estimate_context());
844 }
845
846 static void srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
847                                struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
848                                u64 tag)
849 {
850         struct srp_rsp *srp_rsp;
851         struct sense_data *sense;
852         int limit_delta;
853
854         srp_rsp = ioctx->buf;
855         memset(srp_rsp, 0, sizeof *srp_rsp);
856
857         limit_delta = atomic_read(&ch->req_lim_delta);
858         atomic_sub(limit_delta, &ch->req_lim_delta);
859
860         srp_rsp->opcode = SRP_RSP;
861         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
862         srp_rsp->tag = tag;
863
864         if (s_key != NO_SENSE) {
865                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
866                 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
867                 srp_rsp->sense_data_len =
868                     cpu_to_be32(sizeof *sense + (sizeof *sense % 4));
869
870                 sense = (struct sense_data *)(srp_rsp + 1);
871                 sense->err_code = 0x70;
872                 sense->key = s_key;
873                 sense->asc_ascq = s_code;
874         }
875 }
876
877 static void srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
878                                    struct srpt_ioctx *ioctx, u8 rsp_code,
879                                    u64 tag)
880 {
881         struct srp_rsp *srp_rsp;
882         int limit_delta;
883
884         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
885                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
886
887         srp_rsp = ioctx->buf;
888         memset(srp_rsp, 0, sizeof *srp_rsp);
889
890         limit_delta = atomic_read(&ch->req_lim_delta);
891         atomic_sub(limit_delta, &ch->req_lim_delta);
892
893         srp_rsp->opcode = SRP_RSP;
894         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
895         srp_rsp->tag = tag;
896
897         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
898                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
899                 srp_rsp->resp_data_len = cpu_to_be32(4);
900                 srp_rsp->data[3] = rsp_code;
901         }
902 }
903
904 /*
905  * Process SRP_CMD.
906  */
907 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
908 {
909         struct scst_cmd *scmnd = NULL;
910         struct srp_cmd *srp_cmd = NULL;
911         scst_data_direction dir = SCST_DATA_NONE;
912         int indirect_desc = 0;
913         int ret;
914         unsigned long flags;
915
916         srp_cmd = ioctx->buf;
917
918         if (srp_cmd->buf_fmt) {
919                 ret = srpt_get_desc_tbl(ioctx, srp_cmd, &indirect_desc);
920                 if (ret) {
921                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
922                                            NO_ADD_SENSE, srp_cmd->tag);
923                         ((struct srp_rsp *)ioctx->buf)->status =
924                                         SAM_STAT_TASK_SET_FULL;
925                         goto send_rsp;
926                 }
927
928                 if (indirect_desc) {
929                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
930                                            NO_ADD_SENSE, srp_cmd->tag);
931                         ((struct srp_rsp *)ioctx->buf)->status =
932                                         SAM_STAT_TASK_SET_FULL;
933                         goto send_rsp;
934                 }
935
936                 if (srp_cmd->buf_fmt & 0xf)
937                         dir = SCST_DATA_READ;
938                 else if (srp_cmd->buf_fmt >> 4)
939                         dir = SCST_DATA_WRITE;
940                 else
941                         dir = SCST_DATA_NONE;
942         } else
943                 dir = SCST_DATA_NONE;
944
945         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
946                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
947                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
948         if (!scmnd) {
949                 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
950                                    NO_ADD_SENSE, srp_cmd->tag);
951                 ((struct srp_rsp *)ioctx->buf)->status =
952                         SAM_STAT_TASK_SET_FULL;
953                 goto send_rsp;
954         }
955
956         ioctx->scmnd = scmnd;
957
958         switch (srp_cmd->task_attr) {
959         case SRP_CMD_HEAD_OF_Q:
960                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
961                 break;
962         case SRP_CMD_ORDERED_Q:
963                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
964                 break;
965         case SRP_CMD_SIMPLE_Q:
966                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
967                 break;
968         case SRP_CMD_ACA:
969                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
970                 break;
971         default:
972                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
973                 break;
974         }
975
976         scst_cmd_set_tag(scmnd, srp_cmd->tag);
977         scst_cmd_set_tgt_priv(scmnd, ioctx);
978         scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
979
980         spin_lock_irqsave(&ch->spinlock, flags);
981         list_add_tail(&ioctx->scmnd_list, &ch->active_scmnd_list);
982         ch->active_scmnd_cnt++;
983         spin_unlock_irqrestore(&ch->spinlock, flags);
984
985         scst_cmd_init_done(scmnd, scst_estimate_context());
986
987         return 0;
988
989 send_rsp:
990         return -1;
991 }
992
993 /*
994  * Process SRP_TSK_MGMT. See also table 19 in the T10 SRP r16a document.
995  */
996 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
997                                 struct srpt_ioctx *ioctx)
998 {
999         struct srp_tsk_mgmt *srp_tsk = NULL;
1000         struct srpt_mgmt_ioctx *mgmt_ioctx;
1001         int ret;
1002
1003         srp_tsk = ioctx->buf;
1004
1005         printk(KERN_WARNING PFX
1006                "recv_tsk_mgmt= %d for task_tag= %lld"
1007                " using tag= %lld cm_id= %p sess= %p\n",
1008                srp_tsk->tsk_mgmt_func,
1009                (unsigned long long) srp_tsk->task_tag,
1010                (unsigned long long) srp_tsk->tag,
1011                ch->cm_id, ch->scst_sess);
1012
1013         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1014         if (!mgmt_ioctx) {
1015                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1016                                        srp_tsk->tag);
1017                 goto send_rsp;
1018         }
1019
1020         mgmt_ioctx->ioctx = ioctx;
1021         mgmt_ioctx->ch = ch;
1022         mgmt_ioctx->tag = srp_tsk->tag;
1023
1024         switch (srp_tsk->tsk_mgmt_func) {
1025         case SRP_TSK_ABORT_TASK:
1026                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1027                                           SCST_ABORT_TASK,
1028                                           srp_tsk->task_tag,
1029                                           thread ?
1030                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1031                                           mgmt_ioctx);
1032                 break;
1033         case SRP_TSK_ABORT_TASK_SET:
1034                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1035                                           SCST_ABORT_TASK_SET,
1036                                           (u8 *) &srp_tsk->lun,
1037                                           sizeof srp_tsk->lun,
1038                                           thread ?
1039                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1040                                           mgmt_ioctx);
1041                 break;
1042         case SRP_TSK_CLEAR_TASK_SET:
1043                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1044                                           SCST_CLEAR_TASK_SET,
1045                                           (u8 *) &srp_tsk->lun,
1046                                           sizeof srp_tsk->lun,
1047                                           thread ?
1048                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1049                                           mgmt_ioctx);
1050                 break;
1051 #if 0
1052         case SRP_TSK_LUN_RESET:
1053                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1054                                           SCST_LUN_RESET,
1055                                           (u8 *) &srp_tsk->lun,
1056                                           sizeof srp_tsk->lun,
1057                                           thread ?
1058                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1059                                           mgmt_ioctx);
1060                 break;
1061 #endif
1062         case SRP_TSK_CLEAR_ACA:
1063                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1064                                           SCST_CLEAR_ACA,
1065                                           (u8 *) &srp_tsk->lun,
1066                                           sizeof srp_tsk->lun,
1067                                           thread ?
1068                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1069                                           mgmt_ioctx);
1070                 break;
1071         default:
1072                 srpt_build_tskmgmt_rsp(ch, ioctx,
1073                                        SRP_TSK_MGMT_FUNC_NOT_SUPP,
1074                                        srp_tsk->tag);
1075                 goto send_rsp;
1076         }
1077         return 0;
1078
1079 send_rsp:
1080         return -1;
1081 }
1082
1083 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1084                                struct srpt_ioctx *ioctx)
1085 {
1086         u8 op;
1087         unsigned long flags;
1088
1089         if (ch->state != RDMA_CHANNEL_LIVE) {
1090                 if (ch->state == RDMA_CHANNEL_CONNECTING) {
1091                         spin_lock_irqsave(&ch->spinlock, flags);
1092                         list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1093                         spin_unlock_irqrestore(&ch->spinlock, flags);
1094                 } else
1095                         srpt_reset_ioctx(ch, ioctx);
1096
1097                 return;
1098         }
1099
1100         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
1101                                 MAX_MESSAGE_SIZE, DMA_FROM_DEVICE);
1102
1103         ioctx->data_len = 0;
1104         ioctx->n_rbuf = 0;
1105         ioctx->rbufs = NULL;
1106         ioctx->n_rdma = 0;
1107         ioctx->n_rdma_ius = 0;
1108         ioctx->rdma_ius = NULL;
1109         ioctx->scmnd = NULL;
1110
1111         op = *(u8 *) ioctx->buf;
1112         switch (op) {
1113         case SRP_CMD:
1114                 if (srpt_handle_cmd(ch, ioctx) < 0)
1115                         goto send_rsp;
1116                 break;
1117
1118         case SRP_TSK_MGMT:
1119                 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1120                         goto send_rsp;
1121                 break;
1122
1123         case SRP_I_LOGOUT:
1124         case SRP_AER_REQ:
1125         default:
1126                 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1127                                    ((struct srp_cmd *)ioctx->buf)->tag);
1128
1129                 goto send_rsp;
1130         }
1131
1132         dma_sync_single_for_device(ch->sport->sdev->device->dma_device,
1133                                    ioctx->dma, MAX_MESSAGE_SIZE,
1134                                    DMA_FROM_DEVICE);
1135
1136         return;
1137
1138 send_rsp:
1139         if (ch->state != RDMA_CHANNEL_LIVE ||
1140             srpt_post_send(ch, ioctx,
1141                            sizeof(struct srp_rsp) +
1142                            be32_to_cpu(((struct srp_rsp *)ioctx->buf)->
1143                                        sense_data_len)))
1144                 srpt_reset_ioctx(ch, ioctx);
1145 }
1146
1147 /*
1148  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1149  * should stop.
1150  * @pre thread != 0
1151  */
1152 static inline int srpt_test_ioctx_list(void)
1153 {
1154         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1155                    unlikely(kthread_should_stop()));
1156         return res;
1157 }
1158
1159 /*
1160  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1161  *
1162  * @pre thread != 0
1163  */
1164 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1165 {
1166         unsigned long flags;
1167
1168         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1169         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1170         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1171         wake_up(&ioctx_list_waitQ);
1172 }
1173
1174 /*
1175  * InfiniBand CQ (completion queue) event handler for asynchronous events not
1176  * associated with a completion.
1177  */
1178 static void srpt_completion(struct ib_cq *cq, void *ctx)
1179 {
1180         struct srpt_rdma_ch *ch = ctx;
1181         struct srpt_device *sdev = ch->sport->sdev;
1182         struct ib_wc wc;
1183         struct srpt_ioctx *ioctx;
1184
1185         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1186         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1187                 if (wc.status) {
1188                         printk(KERN_ERR PFX "failed %s status= %d\n",
1189                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1190                                wc.status);
1191                         srpt_handle_err_comp(ch, &wc);
1192                         break;
1193                 }
1194
1195                 if (wc.wr_id & SRPT_OP_RECV) {
1196                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1197                         if (thread) {
1198                                 ioctx->ch = ch;
1199                                 ioctx->op = IB_WC_RECV;
1200                                 srpt_schedule_thread(ioctx);
1201                         } else
1202                                 srpt_handle_new_iu(ch, ioctx);
1203                         continue;
1204                 } else
1205                         ioctx = sdev->ioctx_ring[wc.wr_id];
1206
1207                 if (thread) {
1208                         ioctx->ch = ch;
1209                         ioctx->op = wc.opcode;
1210                         srpt_schedule_thread(ioctx);
1211                 } else {
1212                         switch (wc.opcode) {
1213                         case IB_WC_SEND:
1214                                 srpt_handle_send_comp(ch, ioctx,
1215                                         scst_estimate_context());
1216                                 break;
1217                         case IB_WC_RDMA_WRITE:
1218                         case IB_WC_RDMA_READ:
1219                                 srpt_handle_rdma_comp(ch, ioctx);
1220                                 break;
1221                         default:
1222                                 break;
1223                         }
1224                 }
1225         }
1226 }
1227
1228 /*
1229  * Create a completion queue on the specified device.
1230  */
1231 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1232 {
1233         struct ib_qp_init_attr *qp_init;
1234         struct srpt_device *sdev = ch->sport->sdev;
1235         int cqe;
1236         int ret;
1237
1238         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1239         if (!qp_init)
1240                 return -ENOMEM;
1241
1242         /* Create a completion queue (CQ). */
1243
1244         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1245 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1246         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1247 #else
1248         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1249 #endif
1250         if (IS_ERR(ch->cq)) {
1251                 ret = PTR_ERR(ch->cq);
1252                 printk(KERN_ERR PFX "failed to create_cq cqe= %d ret= %d\n",
1253                         cqe, ret);
1254                 goto out;
1255         }
1256
1257         /* Request completion notification. */
1258
1259         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1260
1261         /* Create a queue pair (QP). */
1262
1263         qp_init->qp_context = (void *)ch;
1264         qp_init->event_handler = srpt_qp_event;
1265         qp_init->send_cq = ch->cq;
1266         qp_init->recv_cq = ch->cq;
1267         qp_init->srq = sdev->srq;
1268         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1269         qp_init->qp_type = IB_QPT_RC;
1270         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1271         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1272
1273         ch->qp = ib_create_qp(sdev->pd, qp_init);
1274         if (IS_ERR(ch->qp)) {
1275                 ret = PTR_ERR(ch->qp);
1276                 ib_destroy_cq(ch->cq);
1277                 printk(KERN_ERR PFX "failed to create_qp ret= %d\n", ret);
1278                 goto out;
1279         }
1280
1281         printk(KERN_DEBUG PFX "%s: max_cqe= %d max_sge= %d cm_id= %p\n",
1282                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1283                ch->cm_id);
1284
1285         /* Modify the attributes and the state of queue pair ch->qp. */
1286
1287         ret = srpt_init_ch_qp(ch, ch->qp);
1288         if (ret) {
1289                 ib_destroy_qp(ch->qp);
1290                 ib_destroy_cq(ch->cq);
1291                 goto out;
1292         }
1293
1294         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1295 out:
1296         kfree(qp_init);
1297         return ret;
1298 }
1299
1300 static struct srpt_rdma_ch *srpt_find_channel(struct ib_cm_id *cm_id)
1301 {
1302         struct srpt_device *sdev = cm_id->context;
1303         struct srpt_rdma_ch *ch, *tmp_ch;
1304
1305         spin_lock_irq(&sdev->spinlock);
1306         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1307                 if (ch->cm_id == cm_id) {
1308                         spin_unlock_irq(&sdev->spinlock);
1309                         return ch;
1310                 }
1311         }
1312
1313         spin_unlock_irq(&sdev->spinlock);
1314
1315         return NULL;
1316 }
1317
1318 static int srpt_release_channel(struct srpt_rdma_ch *ch, int destroy_cmid)
1319 {
1320         spin_lock_irq(&ch->sport->sdev->spinlock);
1321         list_del(&ch->list);
1322         spin_unlock_irq(&ch->sport->sdev->spinlock);
1323
1324         if (ch->cm_id && destroy_cmid) {
1325                 printk(KERN_WARNING PFX
1326                        "%s: destroy cm_id= %p\n", __func__, ch->cm_id);
1327                 ib_destroy_cm_id(ch->cm_id);
1328                 ch->cm_id = NULL;
1329         }
1330
1331         ib_destroy_qp(ch->qp);
1332         ib_destroy_cq(ch->cq);
1333
1334         if (ch->scst_sess) {
1335                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1336
1337                 printk(KERN_WARNING PFX
1338                        "%s: release sess= %p sess_name= %s active_cmd= %d\n",
1339                        __func__, ch->scst_sess, ch->sess_name,
1340                        ch->active_scmnd_cnt);
1341
1342                 list_for_each_entry_safe(ioctx, ioctx_tmp,
1343                                          &ch->active_scmnd_list, scmnd_list) {
1344                         list_del(&ioctx->scmnd_list);
1345                         ch->active_scmnd_cnt--;
1346                 }
1347
1348                 scst_unregister_session(ch->scst_sess, 0, NULL);
1349                 ch->scst_sess = NULL;
1350         }
1351
1352         kfree(ch);
1353
1354         return destroy_cmid ? 0 : 1;
1355 }
1356
1357 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq)
1358 {
1359         spin_lock_irq(&ch->spinlock);
1360         ch->state = RDMA_CHANNEL_DISCONNECTING;
1361         spin_unlock_irq(&ch->spinlock);
1362
1363         if (dreq)
1364                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
1365         else
1366                 ib_send_cm_drep(ch->cm_id, NULL, 0);
1367
1368         return 0;
1369 }
1370
1371 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1372                             struct ib_cm_req_event_param *param,
1373                             void *private_data)
1374 {
1375         struct srpt_device *sdev = cm_id->context;
1376         struct srp_login_req *req;
1377         struct srp_login_rsp *rsp;
1378         struct srp_login_rej *rej;
1379         struct ib_cm_rep_param *rep_param;
1380         struct srpt_rdma_ch *ch, *tmp_ch;
1381         u32 it_iu_len;
1382         int ret = 0;
1383
1384         if (!sdev || !private_data)
1385                 return -EINVAL;
1386
1387         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1388         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1389         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1390
1391         if (!rsp || !rej || !rep_param) {
1392                 ret = -ENOMEM;
1393                 goto out;
1394         }
1395
1396         req = (struct srp_login_req *)private_data;
1397
1398         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1399
1400         printk(KERN_DEBUG PFX
1401                "Host login i_port_id=0x%llx:0x%llx t_port_id=0x%llx:0x%llx"
1402                " it_iu_len=%d\n",
1403                (unsigned long long)
1404                be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1405                (unsigned long long)
1406                be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1407                (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1408                (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1409                it_iu_len);
1410
1411         if (it_iu_len > MAX_MESSAGE_SIZE || it_iu_len < 64) {
1412                 rej->reason =
1413                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1414                 ret = -EINVAL;
1415                 printk(KERN_WARNING PFX
1416                        "Reject invalid it_iu_len=%d\n", it_iu_len);
1417                 goto reject;
1418         }
1419
1420         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1421                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1422
1423                 spin_lock_irq(&sdev->spinlock);
1424
1425                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1426                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1427                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1428                             && param->port == ch->sport->port
1429                             && param->listen_id == ch->sport->sdev->cm_id
1430                             && ch->cm_id) {
1431                                 /* found an existing channel */
1432                                 printk(KERN_WARNING PFX
1433                                        "Found existing channel name= %s"
1434                                        " cm_id= %p state= %d\n",
1435                                        ch->sess_name, ch->cm_id, ch->state);
1436
1437                                 spin_unlock_irq(&sdev->spinlock);
1438
1439                                 rsp->rsp_flags =
1440                                     SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1441
1442                                 if (ch->state == RDMA_CHANNEL_LIVE)
1443                                         srpt_disconnect_channel(ch, 1);
1444                                 else if (ch->state == RDMA_CHANNEL_CONNECTING) {
1445                                         ib_send_cm_rej(ch->cm_id,
1446                                                        IB_CM_REJ_NO_RESOURCES,
1447                                                        NULL, 0, NULL, 0);
1448                                         srpt_release_channel(ch, 1);
1449                                 }
1450
1451                                 spin_lock_irq(&sdev->spinlock);
1452                         }
1453                 }
1454
1455                 spin_unlock_irq(&sdev->spinlock);
1456
1457         } else
1458                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1459
1460         if (((u64) (*(u64 *) req->target_port_id) !=
1461              cpu_to_be64(mellanox_ioc_guid)) ||
1462             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1463              cpu_to_be64(mellanox_ioc_guid))) {
1464                 rej->reason =
1465                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1466                 ret = -ENOMEM;
1467                 printk(KERN_WARNING PFX "Reject invalid target_port_id\n");
1468                 goto reject;
1469         }
1470
1471         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1472         if (!ch) {
1473                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1474                 printk(KERN_WARNING PFX "Reject failed allocate rdma_ch\n");
1475                 ret = -ENOMEM;
1476                 goto reject;
1477         }
1478
1479         spin_lock_init(&ch->spinlock);
1480         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1481         memcpy(ch->t_port_id, req->target_port_id, 16);
1482         ch->sport = &sdev->port[param->port - 1];
1483         ch->cm_id = cm_id;
1484         ch->state = RDMA_CHANNEL_CONNECTING;
1485         INIT_LIST_HEAD(&ch->cmd_wait_list);
1486         INIT_LIST_HEAD(&ch->active_scmnd_list);
1487
1488         ret = srpt_create_ch_ib(ch);
1489         if (ret) {
1490                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1491                 printk(KERN_WARNING PFX "Reject failed to create rdma_ch\n");
1492                 goto free_ch;
1493         }
1494
1495         ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTR);
1496         if (ret) {
1497                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1498                 printk(KERN_WARNING PFX
1499                        "Reject failed qp to rtr/rts ret=%d\n", ret);
1500                 goto destroy_ib;
1501         }
1502
1503         sprintf(ch->sess_name, "0x%016llx%016llx",
1504                 (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1505                 (unsigned long long)be64_to_cpu(*(u64 *)(ch->i_port_id + 8)));
1506
1507         BUG_ON(!sdev->scst_tgt);
1508         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1509                                   NULL, NULL);
1510         if (!ch->scst_sess) {
1511                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1512                 printk(KERN_WARNING PFX "Failed to create scst sess\n");
1513                 goto destroy_ib;
1514         }
1515
1516         spin_lock_irq(&sdev->spinlock);
1517         list_add_tail(&ch->list, &sdev->rch_list);
1518         spin_unlock_irq(&sdev->spinlock);
1519
1520         printk(KERN_DEBUG PFX "Establish connection sess=%p name=%s cm_id=%p\n",
1521                ch->scst_sess, ch->sess_name, ch->cm_id);
1522
1523         scst_sess_set_tgt_priv(ch->scst_sess, ch);
1524
1525         /* create srp_login_response */
1526         rsp->opcode = SRP_LOGIN_RSP;
1527         rsp->tag = req->tag;
1528         rsp->max_it_iu_len = req->req_it_iu_len;
1529         rsp->max_ti_iu_len = req->req_it_iu_len;
1530         rsp->buf_fmt =
1531             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1532         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1533         atomic_set(&ch->req_lim_delta, 0);
1534
1535         /* create cm reply */
1536         rep_param->qp_num = ch->qp->qp_num;
1537         rep_param->private_data = (void *)rsp;
1538         rep_param->private_data_len = sizeof *rsp;
1539         rep_param->rnr_retry_count = 7;
1540         rep_param->flow_control = 1;
1541         rep_param->failover_accepted = 0;
1542         rep_param->srq = 1;
1543         rep_param->responder_resources = 4;
1544         rep_param->initiator_depth = 4;
1545
1546         ret = ib_send_cm_rep(cm_id, rep_param);
1547         if (ret)
1548                 srpt_release_channel(ch, 0);
1549
1550         goto out;
1551
1552 destroy_ib:
1553         ib_destroy_qp(ch->qp);
1554         ib_destroy_cq(ch->cq);
1555
1556 free_ch:
1557         kfree(ch);
1558
1559 reject:
1560         rej->opcode = SRP_LOGIN_REJ;
1561         rej->tag = req->tag;
1562         rej->buf_fmt =
1563             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1564
1565         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1566                              (void *)rej, sizeof *rej);
1567
1568 out:
1569         kfree(rep_param);
1570         kfree(rsp);
1571         kfree(rej);
1572
1573         return ret;
1574 }
1575
1576 static int srpt_find_and_release_channel(struct ib_cm_id *cm_id)
1577 {
1578         struct srpt_rdma_ch *ch;
1579
1580         ch = srpt_find_channel(cm_id);
1581         if (!ch)
1582                 return -EINVAL;
1583
1584         return srpt_release_channel(ch, 0);
1585 }
1586
1587 static int srpt_cm_rej_recv(struct ib_cm_id *cm_id)
1588 {
1589         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1590         return srpt_find_and_release_channel(cm_id);
1591 }
1592
1593 static int srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
1594 {
1595         struct srpt_rdma_ch *ch;
1596         int ret;
1597
1598         ch = srpt_find_channel(cm_id);
1599         if (!ch)
1600                 return -EINVAL;
1601
1602         if (ch->state == RDMA_CHANNEL_CONNECTING) {
1603                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1604
1605                 spin_lock_irq(&ch->spinlock);
1606                 ch->state = RDMA_CHANNEL_LIVE;
1607                 spin_unlock_irq(&ch->spinlock);
1608                 ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTS);
1609
1610                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
1611                                          wait_list) {
1612                         list_del(&ioctx->wait_list);
1613                         srpt_handle_new_iu(ch, ioctx);
1614                 }
1615         } else if (ch->state == RDMA_CHANNEL_DISCONNECTING)
1616                 ret = -EAGAIN;
1617         else
1618                 ret = 0;
1619
1620         if (ret) {
1621                 printk(KERN_ERR PFX "cm_id=%p sess_name=%s state=%d\n",
1622                        cm_id, ch->sess_name, ch->state);
1623                 srpt_disconnect_channel(ch, 1);
1624         }
1625
1626         return ret;
1627 }
1628
1629 static int srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
1630 {
1631         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1632         return srpt_find_and_release_channel(cm_id);
1633 }
1634
1635 static int srpt_cm_rep_error(struct ib_cm_id *cm_id)
1636 {
1637         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1638         return srpt_find_and_release_channel(cm_id);
1639 }
1640
1641 static int srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
1642 {
1643         struct srpt_rdma_ch *ch;
1644         int ret = 0;
1645
1646         ch = srpt_find_channel(cm_id);
1647
1648         if (!ch)
1649                 return -EINVAL;
1650
1651         printk(KERN_DEBUG PFX "%s: cm_id= %p ch->state= %d\n",
1652                  __func__, cm_id, ch->state);
1653
1654         switch (ch->state) {
1655         case RDMA_CHANNEL_LIVE:
1656         case RDMA_CHANNEL_CONNECTING:
1657                 ret = srpt_disconnect_channel(ch, 0);
1658                 break;
1659         case RDMA_CHANNEL_DISCONNECTING:
1660         default:
1661                 break;
1662         }
1663
1664         return ret;
1665 }
1666
1667 static int srpt_cm_drep_recv(struct ib_cm_id *cm_id)
1668 {
1669         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1670         return srpt_find_and_release_channel(cm_id);
1671 }
1672
1673 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1674 {
1675         int ret = 0;
1676
1677         switch (event->event) {
1678         case IB_CM_REQ_RECEIVED:
1679                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
1680                                        event->private_data);
1681                 break;
1682         case IB_CM_REJ_RECEIVED:
1683                 ret = srpt_cm_rej_recv(cm_id);
1684                 break;
1685         case IB_CM_RTU_RECEIVED:
1686         case IB_CM_USER_ESTABLISHED:
1687                 ret = srpt_cm_rtu_recv(cm_id);
1688                 break;
1689         case IB_CM_DREQ_RECEIVED:
1690                 ret = srpt_cm_dreq_recv(cm_id);
1691                 break;
1692         case IB_CM_DREP_RECEIVED:
1693                 ret = srpt_cm_drep_recv(cm_id);
1694                 break;
1695         case IB_CM_TIMEWAIT_EXIT:
1696                 ret = srpt_cm_timewait_exit(cm_id);
1697                 break;
1698         case IB_CM_REP_ERROR:
1699                 ret = srpt_cm_rep_error(cm_id);
1700                 break;
1701         default:
1702                 break;
1703         }
1704
1705         return ret;
1706 }
1707
1708 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1709                                  struct srpt_ioctx *ioctx,
1710                                  struct scst_cmd *scmnd)
1711 {
1712         struct scatterlist *scat;
1713         scst_data_direction dir;
1714         struct rdma_iu *riu;
1715         struct srp_direct_buf *db;
1716         dma_addr_t dma_addr;
1717         struct ib_sge *sge;
1718         u64 raddr;
1719         u32 rsize;
1720         u32 tsize;
1721         u32 dma_len;
1722         int count, nrdma;
1723         int i, j, k;
1724
1725         scat = scst_cmd_get_sg(scmnd);
1726         dir = scst_cmd_get_data_direction(scmnd);
1727         count = dma_map_sg(ch->sport->sdev->device->dma_device, scat,
1728                            scst_cmd_get_sg_cnt(scmnd),
1729                            scst_to_tgt_dma_dir(dir));
1730         if (unlikely(!count))
1731                 return -EBUSY;
1732
1733         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
1734                 nrdma = ioctx->n_rdma_ius;
1735         else {
1736                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
1737
1738                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
1739                                           scst_cmd_atomic(scmnd)
1740                                           ? GFP_ATOMIC : GFP_KERNEL);
1741                 if (!ioctx->rdma_ius) {
1742                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1743                                      scat, scst_cmd_get_sg_cnt(scmnd),
1744                                      scst_to_tgt_dma_dir(dir));
1745                         return -ENOMEM;
1746                 }
1747
1748                 ioctx->n_rdma_ius = nrdma;
1749         }
1750
1751         db = ioctx->rbufs;
1752         tsize = (dir == SCST_DATA_READ) ?
1753                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1754         dma_len = sg_dma_len(&scat[0]);
1755         riu = ioctx->rdma_ius;
1756
1757         /*
1758          * For each remote desc - calculate the #ib_sge.
1759          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
1760          *      each remote desc rdma_iu is required a rdma wr;
1761          * else
1762          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
1763          *      another rdma wr
1764          */
1765         for (i = 0, j = 0;
1766              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1767                 rsize = be32_to_cpu(db->len);
1768                 raddr = be64_to_cpu(db->va);
1769                 riu->raddr = raddr;
1770                 riu->rkey = be32_to_cpu(db->key);
1771                 riu->sge_cnt = 0;
1772
1773                 /* calculate how many sge required for this remote_buf */
1774                 while (rsize > 0 && tsize > 0) {
1775
1776                         if (rsize >= dma_len) {
1777                                 tsize -= dma_len;
1778                                 rsize -= dma_len;
1779                                 raddr += dma_len;
1780
1781                                 if (tsize > 0) {
1782                                         ++j;
1783                                         if (j < count)
1784                                                 dma_len = sg_dma_len(&scat[j]);
1785                                 }
1786                         } else {
1787                                 tsize -= rsize;
1788                                 dma_len -= rsize;
1789                                 rsize = 0;
1790                         }
1791
1792                         ++riu->sge_cnt;
1793
1794                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
1795                                 riu->sge =
1796                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
1797                                             scst_cmd_atomic(scmnd)
1798                                             ? GFP_ATOMIC : GFP_KERNEL);
1799                                 if (!riu->sge)
1800                                         goto free_mem;
1801
1802                                 ++ioctx->n_rdma;
1803                                 ++riu;
1804                                 riu->sge_cnt = 0;
1805                                 riu->raddr = raddr;
1806                                 riu->rkey = be32_to_cpu(db->key);
1807                         }
1808                 }
1809
1810                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
1811                                    scst_cmd_atomic(scmnd)
1812                                    ? GFP_ATOMIC : GFP_KERNEL);
1813
1814                 if (!riu->sge)
1815                         goto free_mem;
1816
1817                 ++ioctx->n_rdma;
1818         }
1819
1820         db = ioctx->rbufs;
1821         scat = scst_cmd_get_sg(scmnd);
1822         tsize = (dir == SCST_DATA_READ) ?
1823                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1824         riu = ioctx->rdma_ius;
1825         dma_len = sg_dma_len(&scat[0]);
1826         dma_addr = sg_dma_address(&scat[0]);
1827
1828         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
1829         for (i = 0, j = 0;
1830              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1831                 rsize = be32_to_cpu(db->len);
1832                 sge = riu->sge;
1833                 k = 0;
1834
1835                 while (rsize > 0 && tsize > 0) {
1836                         sge->addr = dma_addr;
1837                         sge->lkey = ch->sport->sdev->mr->lkey;
1838
1839                         if (rsize >= dma_len) {
1840                                 sge->length =
1841                                         (tsize < dma_len) ? tsize : dma_len;
1842                                 tsize -= dma_len;
1843                                 rsize -= dma_len;
1844
1845                                 if (tsize > 0) {
1846                                         ++j;
1847                                         if (j < count) {
1848                                                 dma_len = sg_dma_len(&scat[j]);
1849                                                 dma_addr =
1850                                                     sg_dma_address(&scat[j]);
1851                                         }
1852                                 }
1853                         } else {
1854                                 sge->length = (tsize < rsize) ? tsize : rsize;
1855                                 tsize -= rsize;
1856                                 dma_len -= rsize;
1857                                 dma_addr += rsize;
1858                                 rsize = 0;
1859                         }
1860
1861                         ++k;
1862                         if (k == riu->sge_cnt && rsize > 0) {
1863                                 ++riu;
1864                                 sge = riu->sge;
1865                                 k = 0;
1866                         } else if (rsize > 0)
1867                                 ++sge;
1868                 }
1869         }
1870
1871         return 0;
1872
1873 free_mem:
1874         while (ioctx->n_rdma)
1875                 kfree(ioctx->rdma_ius[ioctx->n_rdma--].sge);
1876
1877         kfree(ioctx->rdma_ius);
1878
1879         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1880                      scat, scst_cmd_get_sg_cnt(scmnd),
1881                      scst_to_tgt_dma_dir(dir));
1882
1883         return -ENOMEM;
1884 }
1885
1886 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1887                               scst_data_direction dir)
1888 {
1889         struct ib_send_wr wr;
1890         struct ib_send_wr *bad_wr;
1891         struct rdma_iu *riu;
1892         int i;
1893         int ret = 0;
1894
1895         riu = ioctx->rdma_ius;
1896         memset(&wr, 0, sizeof wr);
1897
1898         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
1899                 wr.opcode = (dir == SCST_DATA_READ) ?
1900                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
1901                 wr.next = NULL;
1902                 wr.wr_id = ioctx->index;
1903                 wr.wr.rdma.remote_addr = riu->raddr;
1904                 wr.wr.rdma.rkey = riu->rkey;
1905                 wr.num_sge = riu->sge_cnt;
1906                 wr.sg_list = riu->sge;
1907
1908                 /* only get completion event for the last rdma wr */
1909                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
1910                         wr.send_flags = IB_SEND_SIGNALED;
1911
1912                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
1913                 if (ret)
1914                         break;
1915         }
1916
1917         return ret;
1918 }
1919
1920 /*
1921  * Start data reception. Must not block.
1922  */
1923 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1924                           struct scst_cmd *scmnd)
1925 {
1926         int ret;
1927
1928         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
1929         if (ret) {
1930                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1931                 ret = SCST_TGT_RES_QUEUE_FULL;
1932                 goto out;
1933         }
1934
1935         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
1936         if (ret) {
1937                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1938                 if (ret == -EAGAIN || ret == -ENOMEM)
1939                         ret = SCST_TGT_RES_QUEUE_FULL;
1940                 else
1941                         ret = SCST_TGT_RES_FATAL_ERROR;
1942                 goto out;
1943         }
1944
1945         ret = SCST_TGT_RES_SUCCESS;
1946
1947 out:
1948         return ret;
1949 }
1950
1951 /*
1952  * Called by the SCST core to inform ib_srpt that data reception should start.
1953  * Must not block.
1954  */
1955 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
1956 {
1957         struct srpt_rdma_ch *ch;
1958         struct srpt_ioctx *ioctx;
1959
1960         ioctx = scst_cmd_get_tgt_priv(scmnd);
1961         BUG_ON(!ioctx);
1962
1963         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
1964         BUG_ON(!ch);
1965
1966         if (ch->state == RDMA_CHANNEL_DISCONNECTING)
1967                 return SCST_TGT_RES_FATAL_ERROR;
1968         else if (ch->state == RDMA_CHANNEL_CONNECTING)
1969                 return SCST_TGT_RES_QUEUE_FULL;
1970
1971         return srpt_xfer_data(ch, ioctx, scmnd);
1972 }
1973
1974 /*
1975  * Called by the SCST core. Transmits the response buffer and status held in
1976  * 'scmnd'. Must not block.
1977  */
1978 static int srpt_xmit_response(struct scst_cmd *scmnd)
1979 {
1980         struct srpt_rdma_ch *ch;
1981         struct srpt_ioctx *ioctx;
1982         struct srp_rsp *srp_rsp;
1983         u64 tag;
1984         int ret = SCST_TGT_RES_SUCCESS;
1985         int dir;
1986         int status;
1987
1988         ioctx = scst_cmd_get_tgt_priv(scmnd);
1989         BUG_ON(!ioctx);
1990
1991         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
1992         BUG_ON(!ch);
1993
1994         tag = scst_cmd_get_tag(scmnd);
1995
1996         if (ch->state != RDMA_CHANNEL_LIVE) {
1997                 printk(KERN_ERR PFX
1998                        "%s: tag= %lld channel in bad state %d\n",
1999                        __func__, (unsigned long long)tag, ch->state);
2000
2001                 if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2002                         ret = SCST_TGT_RES_FATAL_ERROR;
2003                 else if (ch->state == RDMA_CHANNEL_CONNECTING)
2004                         ret = SCST_TGT_RES_QUEUE_FULL;
2005
2006                 if (unlikely(scst_cmd_aborted(scmnd)))
2007                         goto out_aborted;
2008
2009                 goto out;
2010         }
2011
2012         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
2013                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
2014
2015         srp_rsp = ioctx->buf;
2016
2017         if (unlikely(scst_cmd_aborted(scmnd))) {
2018                 printk(KERN_ERR PFX
2019                        "%s: tag= %lld already get aborted\n",
2020                        __func__, (unsigned long long)tag);
2021                 goto out_aborted;
2022         }
2023
2024         dir = scst_cmd_get_data_direction(scmnd);
2025         status = scst_cmd_get_status(scmnd) & 0xff;
2026
2027         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2028
2029         if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2030                 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2031                 if (srp_rsp->sense_data_len >
2032                     (MAX_MESSAGE_SIZE - sizeof *srp_rsp))
2033                         srp_rsp->sense_data_len =
2034                             MAX_MESSAGE_SIZE - sizeof *srp_rsp;
2035
2036                 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2037                        srp_rsp->sense_data_len);
2038
2039                 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2040                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2041
2042                 if (!status)
2043                         status = SAM_STAT_CHECK_CONDITION;
2044         }
2045
2046         srp_rsp->status = status;
2047
2048         /* transfer read data if any */
2049         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2050                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2051                 if (ret != SCST_TGT_RES_SUCCESS) {
2052                         printk(KERN_ERR PFX
2053                                "%s: tag= %lld xfer_data failed\n",
2054                                __func__, (unsigned long long)tag);
2055                         goto out;
2056                 }
2057         }
2058
2059         if (srpt_post_send(ch, ioctx,
2060                            sizeof *srp_rsp +
2061                            be32_to_cpu(srp_rsp->sense_data_len))) {
2062                 printk(KERN_ERR PFX "%s: ch->state= %d tag= %lld\n",
2063                        __func__, ch->state,
2064                        (unsigned long long)tag);
2065                 ret = SCST_TGT_RES_FATAL_ERROR;
2066         }
2067
2068 out:
2069         return ret;
2070
2071 out_aborted:
2072         ret = SCST_TGT_RES_SUCCESS;
2073         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
2074         scst_tgt_cmd_done(scmnd, SCST_CONTEXT_SAME);
2075         goto out;
2076 }
2077
2078 /*
2079  * Called by the SCST core to inform ib_srpt that a received task management
2080  * function has been completed. Must not block.
2081  */
2082 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2083 {
2084         struct srpt_rdma_ch *ch;
2085         struct srpt_mgmt_ioctx *mgmt_ioctx;
2086         struct srpt_ioctx *ioctx;
2087
2088         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2089         BUG_ON(!mgmt_ioctx);
2090
2091         ch = mgmt_ioctx->ch;
2092         BUG_ON(!ch);
2093
2094         ioctx = mgmt_ioctx->ioctx;
2095         BUG_ON(!ioctx);
2096
2097         printk(KERN_WARNING PFX
2098                "%s: tsk_mgmt_done for tag= %lld status=%d\n",
2099                __func__, (unsigned long long)mgmt_ioctx->tag,
2100                scst_mgmt_cmd_get_status(mcmnd));
2101
2102         srpt_build_tskmgmt_rsp(ch, ioctx,
2103                                (scst_mgmt_cmd_get_status(mcmnd) ==
2104                                 SCST_MGMT_STATUS_SUCCESS) ?
2105                                SRP_TSK_MGMT_SUCCESS : SRP_TSK_MGMT_FAILED,
2106                                mgmt_ioctx->tag);
2107         srpt_post_send(ch, ioctx, sizeof(struct srp_rsp) + 4);
2108
2109         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2110
2111         kfree(mgmt_ioctx);
2112 }
2113
2114 /*
2115  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2116  * to be freed. May be called in IRQ context.
2117  */
2118 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2119 {
2120         struct srpt_rdma_ch *ch;
2121         struct srpt_ioctx *ioctx;
2122
2123         ioctx = scst_cmd_get_tgt_priv(scmnd);
2124         BUG_ON(!ioctx);
2125
2126         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2127         BUG_ON(!ch);
2128
2129         spin_lock_irq(&ch->spinlock);
2130         list_del(&ioctx->scmnd_list);
2131         ch->active_scmnd_cnt--;
2132         spin_unlock_irq(&ch->spinlock);
2133
2134         srpt_reset_ioctx(ch, ioctx);
2135         scst_cmd_set_tgt_priv(scmnd, NULL);
2136 }
2137
2138 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2139 static void srpt_refresh_port_work(void *ctx)
2140 #else
2141 static void srpt_refresh_port_work(struct work_struct *work)
2142 #endif
2143 {
2144 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2145         struct srpt_port *sport = (struct srpt_port *)ctx;
2146 #else
2147         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2148 #endif
2149
2150         srpt_refresh_port(sport);
2151 }
2152
2153 /*
2154  * Called by the SCST core to detect target adapters. Returns the number of
2155  * detected target adapters.
2156  */
2157 static int srpt_detect(struct scst_tgt_template *tp)
2158 {
2159         struct srpt_device *sdev;
2160         int count = 0;
2161
2162         list_for_each_entry(sdev, &srpt_devices, list)
2163                 ++count;
2164         return count;
2165 }
2166
2167 /*
2168  * Callback function called by the SCST core from scst_unregister() to free up
2169  * the resources associated with device scst_tgt.
2170  */
2171 static int srpt_release(struct scst_tgt *scst_tgt)
2172 {
2173         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2174         struct srpt_rdma_ch *ch, *tmp_ch;
2175
2176         BUG_ON(!scst_tgt);
2177 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2178         WARN_ON(!sdev);
2179         if (!sdev)
2180                 return -ENODEV;
2181 #else
2182         if (WARN_ON(!sdev))
2183                 return -ENODEV;
2184 #endif
2185
2186         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list)
2187             srpt_release_channel(ch, 1);
2188
2189         srpt_unregister_mad_agent(sdev);
2190
2191         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2192
2193         return 0;
2194 }
2195
2196 /*
2197  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2198  * when the module parameter 'thread' is not zero (the default is zero).
2199  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2200  *
2201  * @pre thread != 0
2202  */
2203 static int srpt_ioctx_thread(void *arg)
2204 {
2205         struct srpt_ioctx *ioctx;
2206
2207         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2208         current->flags |= PF_NOFREEZE;
2209
2210         spin_lock_irq(&srpt_thread.thread_lock);
2211         while (!kthread_should_stop()) {
2212                 wait_queue_t wait;
2213                 init_waitqueue_entry(&wait, current);
2214
2215                 if (!srpt_test_ioctx_list()) {
2216                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2217
2218                         for (;;) {
2219                                 set_current_state(TASK_INTERRUPTIBLE);
2220                                 if (srpt_test_ioctx_list())
2221                                         break;
2222                                 spin_unlock_irq(&srpt_thread.thread_lock);
2223                                 schedule();
2224                                 spin_lock_irq(&srpt_thread.thread_lock);
2225                         }
2226                         set_current_state(TASK_RUNNING);
2227                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2228                 }
2229
2230                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2231                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2232                                            struct srpt_ioctx, comp_list);
2233
2234                         list_del(&ioctx->comp_list);
2235
2236                         spin_unlock_irq(&srpt_thread.thread_lock);
2237                         switch (ioctx->op) {
2238                         case IB_WC_SEND:
2239                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2240                                         SCST_CONTEXT_DIRECT);
2241                                 break;
2242                         case IB_WC_RDMA_WRITE:
2243                         case IB_WC_RDMA_READ:
2244                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2245                                 break;
2246                         case IB_WC_RECV:
2247                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2248                                 break;
2249                         default:
2250                                 break;
2251                         }
2252                         spin_lock_irq(&srpt_thread.thread_lock);
2253                 }
2254         }
2255         spin_unlock_irq(&srpt_thread.thread_lock);
2256
2257         return 0;
2258 }
2259
2260 /* SCST target template for the SRP target implementation. */
2261 static struct scst_tgt_template srpt_template = {
2262         .name = DRV_NAME,
2263         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2264         .xmit_response_atomic = 1,
2265         .rdy_to_xfer_atomic = 1,
2266         .no_proc_entry = 1,
2267         .detect = srpt_detect,
2268         .release = srpt_release,
2269         .xmit_response = srpt_xmit_response,
2270         .rdy_to_xfer = srpt_rdy_to_xfer,
2271         .on_free_cmd = srpt_on_free_cmd,
2272         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2273 };
2274
2275 /*
2276  * The callback function srpt_release_class_dev() is called whenever a
2277  * device is removed from the /sys/class/infiniband_srpt device class.
2278  */
2279 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2280 static void srpt_release_class_dev(struct class_device *class_dev)
2281 #else
2282 static void srpt_release_class_dev(struct device *dev)
2283 #endif
2284 {
2285 }
2286
2287 static struct class srpt_class = {
2288         .name = "infiniband_srpt",
2289 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2290         .release = srpt_release_class_dev
2291 #else
2292         .dev_release = srpt_release_class_dev
2293 #endif
2294 };
2295
2296 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2297 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2298 #else
2299 static ssize_t show_login_info(struct device *dev,
2300                                struct device_attribute *attr, char *buf)
2301 #endif
2302 {
2303         struct srpt_device *sdev =
2304 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2305                 container_of(class_dev, struct srpt_device, class_dev);
2306 #else
2307                 container_of(dev, struct srpt_device, dev);
2308 #endif
2309         struct srpt_port *sport;
2310         int i;
2311         int len = 0;
2312
2313         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2314                 sport = &sdev->port[i];
2315
2316                 len += sprintf(buf + len,
2317                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2318                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2319                                "service_id=%016llx\n",
2320                                (unsigned long long) mellanox_ioc_guid,
2321                                (unsigned long long) mellanox_ioc_guid,
2322                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2323                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2324                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2325                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2326                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2327                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2328                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2329                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2330                                (unsigned long long) mellanox_ioc_guid);
2331         }
2332
2333         return len;
2334 }
2335
2336 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2337 static CLASS_DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2338 #else
2339 static DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2340 #endif
2341
2342 /*
2343  * Callback function called by the InfiniBand core when either an InfiniBand
2344  * device has been added or during the ib_register_client() call for each
2345  * registered InfiniBand device.
2346  */
2347 static void srpt_add_one(struct ib_device *device)
2348 {
2349         struct srpt_device *sdev;
2350         struct srpt_port *sport;
2351         struct ib_srq_init_attr srq_attr;
2352         int i;
2353
2354         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2355         if (!sdev)
2356                 return;
2357
2358         sdev->device = device;
2359
2360 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2361         sdev->class_dev.class = &srpt_class;
2362         sdev->class_dev.dev = device->dma_device;
2363         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2364                  "srpt-%s", device->name);
2365 #else
2366         sdev->dev.class = &srpt_class;
2367         sdev->dev.parent = device->dma_device;
2368 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2369         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2370 #else
2371         snprintf(sdev->init_name, sizeof(sdev->init_name),
2372                  "srpt-%s", device->name);
2373         sdev->dev.init_name = sdev->init_name;
2374 #endif
2375 #endif
2376
2377 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2378         if (class_device_register(&sdev->class_dev))
2379                 goto free_dev;
2380         if (class_device_create_file(&sdev->class_dev,
2381                                      &class_device_attr_login_info))
2382                 goto err_dev;
2383 #else
2384         if (device_register(&sdev->dev))
2385                 goto free_dev;
2386         if (device_create_file(&sdev->dev, &dev_attr_login_info))
2387                 goto err_dev;
2388 #endif
2389
2390         if (ib_query_device(device, &sdev->dev_attr))
2391                 goto err_dev;
2392
2393         sdev->pd = ib_alloc_pd(device);
2394         if (IS_ERR(sdev->pd))
2395                 goto err_dev;
2396
2397         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2398         if (IS_ERR(sdev->mr))
2399                 goto err_pd;
2400
2401         srq_attr.event_handler = srpt_srq_event;
2402         srq_attr.srq_context = (void *)sdev;
2403         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2404         srq_attr.attr.max_sge = 1;
2405         srq_attr.attr.srq_limit = 0;
2406
2407         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2408         if (IS_ERR(sdev->srq))
2409                 goto err_mr;
2410
2411         printk(KERN_DEBUG PFX "%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
2412                __func__, srq_attr.attr.max_wr,
2413               sdev->dev_attr.max_srq_wr, device->name);
2414
2415         if (!mellanox_ioc_guid)
2416                 mellanox_ioc_guid = be64_to_cpu(device->node_guid);
2417
2418         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2419         if (IS_ERR(sdev->cm_id))
2420                 goto err_srq;
2421
2422         /* print out target login information */
2423         printk(KERN_DEBUG PFX "Target login info: id_ext=%016llx,"
2424                 "ioc_guid=%016llx,pkey=ffff,service_id=%016llx\n",
2425                 (unsigned long long) mellanox_ioc_guid,
2426                 (unsigned long long) mellanox_ioc_guid,
2427                 (unsigned long long) mellanox_ioc_guid);
2428
2429         /*
2430          * We do not have a consistent service_id (ie. also id_ext of target_id)
2431          * to identify this target. We currently use the guid of the first HCA
2432          * in the system as service_id; therefore, the target_id will change
2433          * if this HCA is gone bad and replaced by different HCA
2434          */
2435         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(mellanox_ioc_guid), 0, NULL))
2436                 goto err_cm;
2437
2438         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2439                               srpt_event_handler);
2440         if (ib_register_event_handler(&sdev->event_handler))
2441                 goto err_cm;
2442
2443         if (srpt_alloc_ioctx_ring(sdev))
2444                 goto err_event;
2445
2446         INIT_LIST_HEAD(&sdev->rch_list);
2447         spin_lock_init(&sdev->spinlock);
2448
2449         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2450                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2451
2452         list_add_tail(&sdev->list, &srpt_devices);
2453
2454         ib_set_client_data(device, &srpt_client, sdev);
2455
2456         sdev->scst_tgt = scst_register(&srpt_template, NULL);
2457         if (!sdev->scst_tgt) {
2458                 printk(KERN_ERR PFX "SCST registration failed for %s.\n",
2459                         sdev->device->name);
2460                 goto err_ring;
2461         }
2462
2463         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
2464
2465         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
2466                 sport = &sdev->port[i - 1];
2467                 sport->sdev = sdev;
2468                 sport->port = i;
2469 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2470                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
2471 #else
2472                 INIT_WORK(&sport->work, srpt_refresh_port_work);
2473 #endif
2474                 if (srpt_refresh_port(sport)) {
2475                         printk(KERN_ERR PFX "MAD registration failed"
2476                                " for %s-%d.\n", sdev->device->name, i);
2477                         goto err_refresh_port;
2478                 }
2479         }
2480
2481         return;
2482
2483 err_refresh_port:
2484         scst_unregister(sdev->scst_tgt);
2485 err_ring:
2486         ib_set_client_data(device, &srpt_client, NULL);
2487         list_del(&sdev->list);
2488         srpt_free_ioctx_ring(sdev);
2489 err_event:
2490         ib_unregister_event_handler(&sdev->event_handler);
2491 err_cm:
2492         ib_destroy_cm_id(sdev->cm_id);
2493 err_srq:
2494         ib_destroy_srq(sdev->srq);
2495 err_mr:
2496         ib_dereg_mr(sdev->mr);
2497 err_pd:
2498         ib_dealloc_pd(sdev->pd);
2499 err_dev:
2500 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2501         class_device_unregister(&sdev->class_dev);
2502 #else
2503         device_unregister(&sdev->dev);
2504 #endif
2505 free_dev:
2506         kfree(sdev);
2507 }
2508
2509 /*
2510  * Callback function called by the InfiniBand core when either an InfiniBand
2511  * device has been removed or during the ib_unregister_client() call for each
2512  * registered InfiniBand device.
2513  */
2514 static void srpt_remove_one(struct ib_device *device)
2515 {
2516         struct srpt_device *sdev;
2517
2518         sdev = ib_get_client_data(device, &srpt_client);
2519 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2520         WARN_ON(!sdev);
2521         if (!sdev)
2522                 return;
2523 #else
2524         if (WARN_ON(!sdev))
2525                 return;
2526 #endif
2527
2528         scst_unregister(sdev->scst_tgt);
2529         sdev->scst_tgt = NULL;
2530
2531         ib_unregister_event_handler(&sdev->event_handler);
2532         ib_destroy_cm_id(sdev->cm_id);
2533         ib_destroy_srq(sdev->srq);
2534         ib_dereg_mr(sdev->mr);
2535         ib_dealloc_pd(sdev->pd);
2536 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2537         class_device_unregister(&sdev->class_dev);
2538 #else
2539         device_unregister(&sdev->dev);
2540 #endif
2541
2542         srpt_free_ioctx_ring(sdev);
2543         list_del(&sdev->list);
2544         kfree(sdev);
2545 }
2546
2547 /*
2548  * Module initialization.
2549  *
2550  * Note: since ib_register_client() registers callback functions, and since at
2551  * least one of these callback functions (srpt_add_one()) calls SCST functions,
2552  * the SCST target template must be registered before ib_register_client() is
2553  * called.
2554  */
2555 static int __init srpt_init_module(void)
2556 {
2557         int ret;
2558
2559         INIT_LIST_HEAD(&srpt_devices);
2560
2561         ret = class_register(&srpt_class);
2562         if (ret) {
2563                 printk(KERN_ERR PFX "couldn't register class ib_srpt\n");
2564                 return ret;
2565         }
2566
2567         ret = scst_register_target_template(&srpt_template);
2568         if (ret < 0) {
2569                 printk(KERN_ERR PFX "couldn't register with scst\n");
2570                 ret = -ENODEV;
2571                 goto mem_out;
2572         }
2573
2574         ret = ib_register_client(&srpt_client);
2575         if (ret) {
2576                 printk(KERN_ERR PFX "couldn't register IB client\n");
2577                 goto scst_out;
2578         }
2579
2580         if (thread) {
2581                 spin_lock_init(&srpt_thread.thread_lock);
2582                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
2583                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
2584                                                  NULL, "srpt_thread");
2585                 if (IS_ERR(srpt_thread.thread)) {
2586                         srpt_thread.thread = NULL;
2587                         thread = 0;
2588                 }
2589         }
2590
2591         return 0;
2592
2593 scst_out:
2594         scst_unregister_target_template(&srpt_template);
2595 mem_out:
2596         class_unregister(&srpt_class);
2597         return ret;
2598 }
2599
2600 static void __exit srpt_cleanup_module(void)
2601 {
2602         if (srpt_thread.thread)
2603                 kthread_stop(srpt_thread.thread);
2604         ib_unregister_client(&srpt_client);
2605         scst_unregister_target_template(&srpt_template);
2606         class_unregister(&srpt_class);
2607 }
2608
2609 module_init(srpt_init_module);
2610 module_exit(srpt_cleanup_module);