Fixed the issue that 'rmmod ib_srpt' with ongoing SRP I/O could trigger
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43 #include <asm/atomic.h>
44 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #endif
48 #include "ib_srpt.h"
49 #include "scst_debug.h"
50
51 /* Name of this kernel module. */
52 #define DRV_NAME                "ib_srpt"
53 /* Prefix for printk() kernel messages. */
54 #define PFX                     DRV_NAME ": "
55 #define DRV_VERSION             "1.0.1"
56 #define DRV_RELDATE             "July 10, 2008"
57 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
58 /* Flags to be used in SCST debug tracing statements. */
59 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
60                                   | TRACE_MGMT | TRACE_SPECIAL)
61 /* Name of the entry that will be created under /proc/scsi_tgt/ib_srpt. */
62 #define SRPT_PROC_TRACE_LEVEL_NAME      "trace_level"
63 #endif
64
65 #define MELLANOX_SRPT_ID_STRING "Mellanox OFED SRP target"
66
67 MODULE_AUTHOR("Vu Pham");
68 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
69                    "v" DRV_VERSION " (" DRV_RELDATE ")");
70 MODULE_LICENSE("Dual BSD/GPL");
71
72 struct srpt_thread {
73         /* Protects thread_ioctx_list. */
74         spinlock_t thread_lock;
75         /* I/O contexts to be processed by the kernel thread. */
76         struct list_head thread_ioctx_list;
77         /* SRPT kernel thread. */
78         struct task_struct *thread;
79 };
80
81 /*
82  * Global Variables
83  */
84
85 static u64 mellanox_ioc_guid;
86 /* List of srpt_device structures. */
87 static struct list_head srpt_devices;
88 static int thread;
89 static struct srpt_thread srpt_thread;
90 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
91 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
92 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
93 module_param(trace_flag, long, 0644);
94 MODULE_PARM_DESC(trace_flag,
95                  "Trace flags for the ib_srpt kernel module.");
96 #endif
97
98 module_param(thread, int, 0444);
99 MODULE_PARM_DESC(thread,
100                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
101                  "where possible");
102
103 static void srpt_add_one(struct ib_device *device);
104 static void srpt_remove_one(struct ib_device *device);
105 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq);
106 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
107 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt);
108
109 static struct ib_client srpt_client = {
110         .name = DRV_NAME,
111         .add = srpt_add_one,
112         .remove = srpt_remove_one
113 };
114
115 /*
116  * Callback function called by the InfiniBand core when an asynchronous IB
117  * event occurs. This callback may occur in interrupt context. See also
118  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
119  * Architecture Specification.
120  */
121 static void srpt_event_handler(struct ib_event_handler *handler,
122                                struct ib_event *event)
123 {
124         struct srpt_device *sdev =
125             ib_get_client_data(event->device, &srpt_client);
126         struct srpt_port *sport;
127
128         if (!sdev || sdev->device != event->device)
129                 return;
130
131         printk(KERN_WARNING PFX "ASYNC event= %d on device= %s\n",
132                 event->event, sdev->device->name);
133
134         switch (event->event) {
135         case IB_EVENT_PORT_ERR:
136                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
137                         sport = &sdev->port[event->element.port_num - 1];
138                         sport->lid = 0;
139                         sport->sm_lid = 0;
140                 }
141                 break;
142         case IB_EVENT_PORT_ACTIVE:
143         case IB_EVENT_LID_CHANGE:
144         case IB_EVENT_PKEY_CHANGE:
145         case IB_EVENT_SM_CHANGE:
146         case IB_EVENT_CLIENT_REREGISTER:
147                 /*
148                  * Refresh port data asynchronously. Note: it is safe to call
149                  * schedule_work() even if &sport->work is already on the
150                  * global workqueue because schedule_work() tests for the
151                  * work_pending() condition before adding &sport->work to the
152                  * global work queue.
153                  */
154                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
155                         sport = &sdev->port[event->element.port_num - 1];
156                         if (!sport->lid && !sport->sm_lid)
157                                 schedule_work(&sport->work);
158                 }
159                 break;
160         default:
161                 break;
162         }
163
164 }
165
166 /*
167  * Callback function called by the InfiniBand core for SRQ (shared receive
168  * queue) events.
169  */
170 static void srpt_srq_event(struct ib_event *event, void *ctx)
171 {
172         printk(KERN_WARNING PFX "SRQ event %d\n", event->event);
173 }
174
175 /*
176  * Callback function called by the InfiniBand core for QP (queue pair) events.
177  */
178 static void srpt_qp_event(struct ib_event *event, void *ctx)
179 {
180         struct srpt_rdma_ch *ch = ctx;
181
182         printk(KERN_WARNING PFX
183                "QP event %d on cm_id=%p sess_name=%s state=%d\n",
184                event->event, ch->cm_id, ch->sess_name, ch->state);
185
186         switch (event->event) {
187         case IB_EVENT_COMM_EST:
188 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
189                 ib_cm_notify(ch->cm_id, event->event);
190 #else
191                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
192                 printk(KERN_ERR PFX "how to perform ib_cm_notify() on a"
193                         " vanilla 2.6.18 kernel ???\n");
194 #endif
195                 break;
196         case IB_EVENT_QP_LAST_WQE_REACHED:
197                 if (ch->state == RDMA_CHANNEL_LIVE) {
198                         printk(KERN_WARNING PFX
199                                "Schedule CM_DISCONNECT_WORK\n");
200                         srpt_disconnect_channel(ch, 1);
201                 }
202                 break;
203         default:
204                 break;
205         }
206 }
207
208 /*
209  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
210  * the lowest four bits of value in element slot of the array of four bit
211  * elements called c_list (controller list). The index slot is one-based.
212  *
213  * @pre 1 <= slot && 0 <= value && value < 16
214  */
215 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
216 {
217         u16 id;
218         u8 tmp;
219
220         id = (slot - 1) / 2;
221         if (slot & 0x1) {
222                 tmp = c_list[id] & 0xf;
223                 c_list[id] = (value << 4) | tmp;
224         } else {
225                 tmp = c_list[id] & 0xf0;
226                 c_list[id] = (value & 0xf) | tmp;
227         }
228 }
229
230 /*
231  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
232  * ClassPortInfo in the InfiniBand Architecture Specification.
233  */
234 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
235 {
236         struct ib_class_port_info *cif;
237
238         cif = (struct ib_class_port_info *)mad->data;
239         memset(cif, 0, sizeof *cif);
240         cif->base_version = 1;
241         cif->class_version = 1;
242         cif->resp_time_value = 20;
243
244         mad->mad_hdr.status = 0;
245 }
246
247 /*
248  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
249  * InfiniBand Architecture Specification. See also section B.7,
250  * table B.6 in the T10 SRP r16a document.
251  */
252 static void srpt_get_iou(struct ib_dm_mad *mad)
253 {
254         struct ib_dm_iou_info *ioui;
255         u8 slot;
256         int i;
257
258         ioui = (struct ib_dm_iou_info *)mad->data;
259         ioui->change_id = 1;
260         ioui->max_controllers = 16;
261
262         /* set present for slot 1 and empty for the rest */
263         srpt_set_ioc(ioui->controller_list, 1, 1);
264         for (i = 1, slot = 2; i < 16; i++, slot++)
265                 srpt_set_ioc(ioui->controller_list, slot, 0);
266
267         mad->mad_hdr.status = 0;
268 }
269
270 /*
271  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
272  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
273  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
274  * document.
275  */
276 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
277                          struct ib_dm_mad *mad)
278 {
279         struct ib_dm_ioc_profile *iocp;
280
281         iocp = (struct ib_dm_ioc_profile *)mad->data;
282
283         if (!slot || slot > 16) {
284                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
285                 return;
286         }
287
288         if (slot > 2) {
289                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
290                 return;
291         }
292
293         memset(iocp, 0, sizeof *iocp);
294         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
295         iocp->guid = cpu_to_be64(mellanox_ioc_guid);
296         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
297         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
298         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
299         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
300         iocp->subsys_device_id = 0x0;
301         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
302         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
303         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
304         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
305         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
306         iocp->rdma_read_depth = 4;
307         iocp->send_size = cpu_to_be32(MAX_MESSAGE_SIZE);
308         iocp->rdma_size = cpu_to_be32(MAX_RDMA_SIZE);
309         iocp->num_svc_entries = 1;
310         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
311             SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
312
313         mad->mad_hdr.status = 0;
314 }
315
316 /*
317  * Device management: write ServiceEntries to mad for the given slot. See also
318  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
319  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
320  */
321 static void srpt_get_svc_entries(u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
322 {
323         struct ib_dm_svc_entries *svc_entries;
324
325         if (!slot || slot > 16) {
326                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
327                 return;
328         }
329
330         if (slot > 2 || lo > hi || hi > 1) {
331                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
332                 return;
333         }
334
335         svc_entries = (struct ib_dm_svc_entries *)mad->data;
336         memset(svc_entries, 0, sizeof *svc_entries);
337         svc_entries->service_entries[0].id = cpu_to_be64(mellanox_ioc_guid);
338         snprintf(svc_entries->service_entries[0].name,
339                  sizeof(svc_entries->service_entries[0].name),
340                  "%s%016llx",
341                  SRP_SERVICE_NAME_PREFIX,
342                  (unsigned long long)mellanox_ioc_guid);
343
344         mad->mad_hdr.status = 0;
345 }
346
347 /*
348  * Actual processing of a received MAD *rq_mad received through source port *sp
349  * (MAD = InfiniBand management datagram). The response to be sent back is
350  * written to *rsp_mad.
351  */
352 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
353                                  struct ib_dm_mad *rsp_mad)
354 {
355         u16 attr_id;
356         u32 slot;
357         u8 hi, lo;
358
359         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
360         switch (attr_id) {
361         case DM_ATTR_CLASS_PORT_INFO:
362                 srpt_get_class_port_info(rsp_mad);
363                 break;
364         case DM_ATTR_IOU_INFO:
365                 srpt_get_iou(rsp_mad);
366                 break;
367         case DM_ATTR_IOC_PROFILE:
368                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
369                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
370                 break;
371         case DM_ATTR_SVC_ENTRIES:
372                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
373                 hi = (u8) ((slot >> 8) & 0xff);
374                 lo = (u8) (slot & 0xff);
375                 slot = (u16) ((slot >> 16) & 0xffff);
376                 srpt_get_svc_entries(slot, hi, lo, rsp_mad);
377                 break;
378         default:
379                 rsp_mad->mad_hdr.status =
380                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
381                 break;
382         }
383 }
384
385 /*
386  * Callback function that is called by the InfiniBand core after transmission of
387  * a MAD. (MAD = management datagram; AH = address handle.)
388  */
389 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
390                                   struct ib_mad_send_wc *mad_wc)
391 {
392         ib_destroy_ah(mad_wc->send_buf->ah);
393         ib_free_send_mad(mad_wc->send_buf);
394 }
395
396 /*
397  * Callback function that is called by the InfiniBand core after reception of
398  * a MAD (management datagram).
399  */
400 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
401                                   struct ib_mad_recv_wc *mad_wc)
402 {
403         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
404         struct ib_ah *ah;
405         struct ib_mad_send_buf *rsp;
406         struct ib_dm_mad *dm_mad;
407
408         if (!mad_wc || !mad_wc->recv_buf.mad)
409                 return;
410
411         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
412                                   mad_wc->recv_buf.grh, mad_agent->port_num);
413         if (IS_ERR(ah))
414                 goto err;
415
416         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
417
418         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
419                                  mad_wc->wc->pkey_index, 0,
420                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
421                                  GFP_KERNEL);
422         if (IS_ERR(rsp))
423                 goto err_rsp;
424
425         rsp->ah = ah;
426
427         dm_mad = rsp->mad;
428         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
429         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
430         dm_mad->mad_hdr.status = 0;
431
432         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
433         case IB_MGMT_METHOD_GET:
434                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
435                 break;
436         case IB_MGMT_METHOD_SET:
437                 dm_mad->mad_hdr.status =
438                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
439                 break;
440         default:
441                 dm_mad->mad_hdr.status =
442                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
443                 break;
444         }
445
446         if (!ib_post_send_mad(rsp, NULL)) {
447                 ib_free_recv_mad(mad_wc);
448                 /* will destroy_ah & free_send_mad in send completion */
449                 return;
450         }
451
452         ib_free_send_mad(rsp);
453
454 err_rsp:
455         ib_destroy_ah(ah);
456 err:
457         ib_free_recv_mad(mad_wc);
458 }
459
460 /*
461  * Enable InfiniBand management datagram processing, update the cached sm_lid,
462  * lid and gid values, and register a callback function for processing MADs
463  * on the specified port. It is safe to call this function more than once for
464  * the same port.
465  */
466 static int srpt_refresh_port(struct srpt_port *sport)
467 {
468         struct ib_mad_reg_req reg_req;
469         struct ib_port_modify port_modify;
470         struct ib_port_attr port_attr;
471         int ret;
472
473         memset(&port_modify, 0, sizeof port_modify);
474         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
475         port_modify.clr_port_cap_mask = 0;
476
477         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
478         if (ret)
479                 goto err_mod_port;
480
481         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
482         if (ret)
483                 goto err_query_port;
484
485         sport->sm_lid = port_attr.sm_lid;
486         sport->lid = port_attr.lid;
487
488         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
489         if (ret)
490                 goto err_query_port;
491
492         if (!sport->mad_agent) {
493                 memset(&reg_req, 0, sizeof reg_req);
494                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
495                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
496                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
497                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
498
499                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
500                                                          sport->port,
501                                                          IB_QPT_GSI,
502                                                          &reg_req, 0,
503                                                          srpt_mad_send_handler,
504                                                          srpt_mad_recv_handler,
505                                                          sport);
506                 if (IS_ERR(sport->mad_agent)) {
507                         ret = PTR_ERR(sport->mad_agent);
508                         sport->mad_agent = NULL;
509                         goto err_query_port;
510                 }
511         }
512
513         return 0;
514
515 err_query_port:
516
517         port_modify.set_port_cap_mask = 0;
518         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
519         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
520
521 err_mod_port:
522
523         return ret;
524 }
525
526 /*
527  * Unregister the callback function for processing MADs and disable MAD
528  * processing for all ports of the specified device. It is safe to call this
529  * function more than once for the same device.
530  */
531 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
532 {
533         struct ib_port_modify port_modify = {
534                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
535         };
536         struct srpt_port *sport;
537         int i;
538
539         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
540                 sport = &sdev->port[i - 1];
541                 WARN_ON(sport->port != i);
542                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
543                         printk(KERN_ERR PFX "disabling MAD processing"
544                                " failed.\n");
545                 if (sport->mad_agent) {
546                         ib_unregister_mad_agent(sport->mad_agent);
547                         sport->mad_agent = NULL;
548                 }
549         }
550 }
551
552 /*
553  * Allocate and initialize an SRPT I/O context structure.
554  */
555 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
556 {
557         struct srpt_ioctx *ioctx;
558
559         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
560         if (!ioctx)
561                 goto out;
562
563         ioctx->buf = kzalloc(MAX_MESSAGE_SIZE, GFP_KERNEL);
564         if (!ioctx->buf)
565                 goto out_free_ioctx;
566
567         ioctx->dma = dma_map_single(sdev->device->dma_device, ioctx->buf,
568                                     MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
569 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
570         if (dma_mapping_error(sdev->device->dma_device, ioctx->dma))
571 #else
572         if (dma_mapping_error(ioctx->dma))
573 #endif
574                 goto out_free_buf;
575
576         return ioctx;
577
578 out_free_buf:
579         kfree(ioctx->buf);
580 out_free_ioctx:
581         kfree(ioctx);
582 out:
583         return NULL;
584 }
585
586 /*
587  * Deallocate an SRPT I/O context structure.
588  */
589 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
590 {
591         if (!ioctx)
592                 return;
593
594         dma_unmap_single(sdev->device->dma_device, ioctx->dma,
595                          MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
596         kfree(ioctx->buf);
597         kfree(ioctx);
598 }
599
600 /*
601  * Associate a ring of SRPT I/O context structures with the specified device.
602  */
603 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
604 {
605         int i;
606
607         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
608                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
609
610                 if (!sdev->ioctx_ring[i])
611                         goto err;
612
613                 sdev->ioctx_ring[i]->index = i;
614         }
615
616         return 0;
617
618 err:
619         while (--i > 0) {
620                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
621                 sdev->ioctx_ring[i] = NULL;
622         }
623         return -ENOMEM;
624 }
625
626 /* Free the ring of SRPT I/O context structures. */
627 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
628 {
629         int i;
630
631         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
632                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
633                 sdev->ioctx_ring[i] = NULL;
634         }
635 }
636
637 /*
638  * Post a receive request on the work queue of InfiniBand device 'sdev'.
639  */
640 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
641 {
642         struct ib_sge list;
643         struct ib_recv_wr wr, *bad_wr;
644
645         wr.wr_id = ioctx->index | SRPT_OP_RECV;
646
647         list.addr = ioctx->dma;
648         list.length = MAX_MESSAGE_SIZE;
649         list.lkey = sdev->mr->lkey;
650
651         wr.next = NULL;
652         wr.sg_list = &list;
653         wr.num_sge = 1;
654
655         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
656 }
657
658 /*
659  * Post a send request on the SRPT RDMA channel 'ch'.
660  */
661 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
662                           int len)
663 {
664         struct ib_sge list;
665         struct ib_send_wr wr, *bad_wr;
666         struct srpt_device *sdev = ch->sport->sdev;
667
668         dma_sync_single_for_device(sdev->device->dma_device, ioctx->dma,
669                                    MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
670
671         list.addr = ioctx->dma;
672         list.length = len;
673         list.lkey = sdev->mr->lkey;
674
675         wr.next = NULL;
676         wr.wr_id = ioctx->index;
677         wr.sg_list = &list;
678         wr.num_sge = 1;
679         wr.opcode = IB_WR_SEND;
680         wr.send_flags = IB_SEND_SIGNALED;
681
682         return ib_post_send(ch->qp, &wr, &bad_wr);
683 }
684
685 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
686                              int *ind)
687 {
688         struct srp_indirect_buf *idb;
689         struct srp_direct_buf *db;
690
691         *ind = 0;
692         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
693             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
694                 ioctx->n_rbuf = 1;
695                 ioctx->rbufs = &ioctx->single_rbuf;
696
697                 db = (void *)srp_cmd->add_data;
698                 memcpy(ioctx->rbufs, db, sizeof *db);
699                 ioctx->data_len = be32_to_cpu(db->len);
700         } else {
701                 idb = (void *)srp_cmd->add_data;
702
703                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
704
705                 if (ioctx->n_rbuf >
706                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
707                         *ind = 1;
708                         ioctx->n_rbuf = 0;
709                         goto out;
710                 }
711
712                 if (ioctx->n_rbuf == 1)
713                         ioctx->rbufs = &ioctx->single_rbuf;
714                 else
715                         ioctx->rbufs =
716                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
717                 if (!ioctx->rbufs) {
718                         ioctx->n_rbuf = 0;
719                         return -ENOMEM;
720                 }
721
722                 db = idb->desc_list;
723                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
724                 ioctx->data_len = be32_to_cpu(idb->len);
725         }
726 out:
727         return 0;
728 }
729
730 /*
731  * Modify the attributes of queue pair 'qp': allow local write, remote read,
732  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
733  */
734 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
735 {
736         struct ib_qp_attr *attr;
737         int ret;
738
739         attr = kzalloc(sizeof *attr, GFP_KERNEL);
740         if (!attr)
741                 return -ENOMEM;
742
743         attr->qp_state = IB_QPS_INIT;
744         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
745             IB_ACCESS_REMOTE_WRITE;
746         attr->port_num = ch->sport->port;
747         attr->pkey_index = 0;
748
749         ret = ib_modify_qp(qp, attr,
750                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
751                            IB_QP_PKEY_INDEX);
752
753         kfree(attr);
754         return ret;
755 }
756
757 static int srpt_ch_qp_rtr_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp,
758                               enum ib_qp_state qp_state)
759 {
760         struct ib_qp_attr *qp_attr;
761         int attr_mask;
762         int ret;
763
764         qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
765         if (!qp_attr)
766                 return -ENOMEM;
767
768         qp_attr->qp_state = qp_state;
769         ret = ib_cm_init_qp_attr(ch->cm_id, qp_attr, &attr_mask);
770         if (ret)
771                 goto out;
772
773         if (qp_state == IB_QPS_RTR)
774                 qp_attr->max_dest_rd_atomic = 4;
775         else
776                 qp_attr->max_rd_atomic = 4;
777
778         ret = ib_modify_qp(qp, qp_attr, attr_mask);
779
780 out:
781         kfree(qp_attr);
782         return ret;
783 }
784
785 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
786 {
787         int i;
788
789         if (ioctx->n_rdma_ius > 0 && ioctx->rdma_ius) {
790                 struct rdma_iu *riu = ioctx->rdma_ius;
791
792                 for (i = 0; i < ioctx->n_rdma_ius; ++i, ++riu)
793                         kfree(riu->sge);
794                 kfree(ioctx->rdma_ius);
795         }
796
797         if (ioctx->n_rbuf > 1)
798                 kfree(ioctx->rbufs);
799
800         if (srpt_post_recv(ch->sport->sdev, ioctx))
801                 printk(KERN_ERR PFX "SRQ post_recv failed - this is serious\n");
802                 /* we should queue it back to free_ioctx queue */
803         else
804                 atomic_inc(&ch->req_lim_delta);
805 }
806
807 static void srpt_abort_scst_cmd(struct srpt_device *sdev,
808                                 struct scst_cmd *scmnd,
809                                 bool tell_initiator)
810 {
811         scst_data_direction dir;
812
813         dir = scst_cmd_get_data_direction(scmnd);
814         if (dir != SCST_DATA_NONE) {
815                 dma_unmap_sg(sdev->device->dma_device,
816                              scst_cmd_get_sg(scmnd),
817                              scst_cmd_get_sg_cnt(scmnd),
818                              scst_to_tgt_dma_dir(dir));
819
820                 if (scmnd->state == SCST_CMD_STATE_DATA_WAIT)
821                         scst_rx_data(scmnd,
822                                      tell_initiator ? SCST_RX_STATUS_ERROR
823                                      : SCST_RX_STATUS_ERROR_FATAL,
824                                      SCST_CONTEXT_THREAD);
825                 else if (scmnd->state == SCST_CMD_STATE_XMIT_WAIT)
826                         ;
827         }
828
829         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_FAILED);
830         scst_tgt_cmd_done(scmnd, scst_estimate_context());
831 }
832
833 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
834 {
835         struct srpt_ioctx *ioctx;
836         struct srpt_device *sdev = ch->sport->sdev;
837
838         if (wc->wr_id & SRPT_OP_RECV) {
839                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
840                 printk(KERN_ERR PFX "This is serious - SRQ is in bad state\n");
841         } else {
842                 ioctx = sdev->ioctx_ring[wc->wr_id];
843
844                 if (ioctx->scmnd)
845                         srpt_abort_scst_cmd(sdev, ioctx->scmnd, true);
846                 else
847                         srpt_reset_ioctx(ch, ioctx);
848         }
849 }
850
851 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
852                                   struct srpt_ioctx *ioctx,
853                                   enum scst_exec_context context)
854 {
855         if (ioctx->scmnd) {
856                 scst_data_direction dir =
857                         scst_cmd_get_data_direction(ioctx->scmnd);
858
859                 if (dir != SCST_DATA_NONE)
860                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
861                                      scst_cmd_get_sg(ioctx->scmnd),
862                                      scst_cmd_get_sg_cnt(ioctx->scmnd),
863                                      scst_to_tgt_dma_dir(dir));
864
865                 scst_tgt_cmd_done(ioctx->scmnd, context);
866         } else
867                 srpt_reset_ioctx(ch, ioctx);
868 }
869
870 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
871                                   struct srpt_ioctx *ioctx)
872 {
873         if (!ioctx->scmnd) {
874                 srpt_reset_ioctx(ch, ioctx);
875                 return;
876         }
877
878         if (scst_cmd_get_data_direction(ioctx->scmnd) == SCST_DATA_WRITE)
879                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
880                         scst_estimate_context());
881 }
882
883 static void srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
884                                struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
885                                u64 tag)
886 {
887         struct srp_rsp *srp_rsp;
888         struct sense_data *sense;
889         int limit_delta;
890
891         srp_rsp = ioctx->buf;
892         memset(srp_rsp, 0, sizeof *srp_rsp);
893
894         limit_delta = atomic_read(&ch->req_lim_delta);
895         atomic_sub(limit_delta, &ch->req_lim_delta);
896
897         srp_rsp->opcode = SRP_RSP;
898         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
899         srp_rsp->tag = tag;
900
901         if (s_key != NO_SENSE) {
902                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
903                 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
904                 srp_rsp->sense_data_len =
905                     cpu_to_be32(sizeof *sense + (sizeof *sense % 4));
906
907                 sense = (struct sense_data *)(srp_rsp + 1);
908                 sense->err_code = 0x70;
909                 sense->key = s_key;
910                 sense->asc_ascq = s_code;
911         }
912 }
913
914 static void srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
915                                    struct srpt_ioctx *ioctx, u8 rsp_code,
916                                    u64 tag)
917 {
918         struct srp_rsp *srp_rsp;
919         int limit_delta;
920
921         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
922                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
923
924         srp_rsp = ioctx->buf;
925         memset(srp_rsp, 0, sizeof *srp_rsp);
926
927         limit_delta = atomic_read(&ch->req_lim_delta);
928         atomic_sub(limit_delta, &ch->req_lim_delta);
929
930         srp_rsp->opcode = SRP_RSP;
931         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
932         srp_rsp->tag = tag;
933
934         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
935                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
936                 srp_rsp->resp_data_len = cpu_to_be32(4);
937                 srp_rsp->data[3] = rsp_code;
938         }
939 }
940
941 /*
942  * Process SRP_CMD.
943  */
944 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
945 {
946         struct scst_cmd *scmnd = NULL;
947         struct srp_cmd *srp_cmd = NULL;
948         scst_data_direction dir = SCST_DATA_NONE;
949         int indirect_desc = 0;
950         int ret;
951         unsigned long flags;
952
953         srp_cmd = ioctx->buf;
954
955         if (srp_cmd->buf_fmt) {
956                 ret = srpt_get_desc_tbl(ioctx, srp_cmd, &indirect_desc);
957                 if (ret) {
958                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
959                                            NO_ADD_SENSE, srp_cmd->tag);
960                         ((struct srp_rsp *)ioctx->buf)->status =
961                                         SAM_STAT_TASK_SET_FULL;
962                         goto send_rsp;
963                 }
964
965                 if (indirect_desc) {
966                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
967                                            NO_ADD_SENSE, srp_cmd->tag);
968                         ((struct srp_rsp *)ioctx->buf)->status =
969                                         SAM_STAT_TASK_SET_FULL;
970                         goto send_rsp;
971                 }
972
973                 if (srp_cmd->buf_fmt & 0xf)
974                         dir = SCST_DATA_READ;
975                 else if (srp_cmd->buf_fmt >> 4)
976                         dir = SCST_DATA_WRITE;
977                 else
978                         dir = SCST_DATA_NONE;
979         } else
980                 dir = SCST_DATA_NONE;
981
982         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
983                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
984                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
985         if (!scmnd) {
986                 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
987                                    NO_ADD_SENSE, srp_cmd->tag);
988                 ((struct srp_rsp *)ioctx->buf)->status =
989                         SAM_STAT_TASK_SET_FULL;
990                 goto send_rsp;
991         }
992
993         ioctx->scmnd = scmnd;
994
995         switch (srp_cmd->task_attr) {
996         case SRP_CMD_HEAD_OF_Q:
997                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
998                 break;
999         case SRP_CMD_ORDERED_Q:
1000                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1001                 break;
1002         case SRP_CMD_SIMPLE_Q:
1003                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
1004                 break;
1005         case SRP_CMD_ACA:
1006                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
1007                 break;
1008         default:
1009                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1010                 break;
1011         }
1012
1013         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1014         scst_cmd_set_tgt_priv(scmnd, ioctx);
1015         scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
1016
1017         spin_lock_irqsave(&ch->spinlock, flags);
1018         list_add_tail(&ioctx->scmnd_list, &ch->active_scmnd_list);
1019         ch->active_scmnd_cnt++;
1020         spin_unlock_irqrestore(&ch->spinlock, flags);
1021
1022         scst_cmd_init_done(scmnd, scst_estimate_context());
1023
1024         return 0;
1025
1026 send_rsp:
1027         return -1;
1028 }
1029
1030 /*
1031  * Process SRP_TSK_MGMT. See also table 19 in the T10 SRP r16a document.
1032  */
1033 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1034                                 struct srpt_ioctx *ioctx)
1035 {
1036         struct srp_tsk_mgmt *srp_tsk = NULL;
1037         struct srpt_mgmt_ioctx *mgmt_ioctx;
1038         int ret;
1039
1040         srp_tsk = ioctx->buf;
1041
1042         printk(KERN_WARNING PFX
1043                "recv_tsk_mgmt= %d for task_tag= %lld"
1044                " using tag= %lld cm_id= %p sess= %p\n",
1045                srp_tsk->tsk_mgmt_func,
1046                (unsigned long long) srp_tsk->task_tag,
1047                (unsigned long long) srp_tsk->tag,
1048                ch->cm_id, ch->scst_sess);
1049
1050         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1051         if (!mgmt_ioctx) {
1052                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1053                                        srp_tsk->tag);
1054                 goto send_rsp;
1055         }
1056
1057         mgmt_ioctx->ioctx = ioctx;
1058         mgmt_ioctx->ch = ch;
1059         mgmt_ioctx->tag = srp_tsk->tag;
1060
1061         switch (srp_tsk->tsk_mgmt_func) {
1062         case SRP_TSK_ABORT_TASK:
1063                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1064                                           SCST_ABORT_TASK,
1065                                           srp_tsk->task_tag,
1066                                           thread ?
1067                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1068                                           mgmt_ioctx);
1069                 break;
1070         case SRP_TSK_ABORT_TASK_SET:
1071                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1072                                           SCST_ABORT_TASK_SET,
1073                                           (u8 *) &srp_tsk->lun,
1074                                           sizeof srp_tsk->lun,
1075                                           thread ?
1076                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1077                                           mgmt_ioctx);
1078                 break;
1079         case SRP_TSK_CLEAR_TASK_SET:
1080                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1081                                           SCST_CLEAR_TASK_SET,
1082                                           (u8 *) &srp_tsk->lun,
1083                                           sizeof srp_tsk->lun,
1084                                           thread ?
1085                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1086                                           mgmt_ioctx);
1087                 break;
1088 #if 0
1089         case SRP_TSK_LUN_RESET:
1090                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1091                                           SCST_LUN_RESET,
1092                                           (u8 *) &srp_tsk->lun,
1093                                           sizeof srp_tsk->lun,
1094                                           thread ?
1095                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1096                                           mgmt_ioctx);
1097                 break;
1098 #endif
1099         case SRP_TSK_CLEAR_ACA:
1100                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1101                                           SCST_CLEAR_ACA,
1102                                           (u8 *) &srp_tsk->lun,
1103                                           sizeof srp_tsk->lun,
1104                                           thread ?
1105                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1106                                           mgmt_ioctx);
1107                 break;
1108         default:
1109                 srpt_build_tskmgmt_rsp(ch, ioctx,
1110                                        SRP_TSK_MGMT_FUNC_NOT_SUPP,
1111                                        srp_tsk->tag);
1112                 goto send_rsp;
1113         }
1114         return 0;
1115
1116 send_rsp:
1117         return -1;
1118 }
1119
1120 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1121                                struct srpt_ioctx *ioctx)
1122 {
1123         u8 op;
1124         unsigned long flags;
1125
1126         if (ch->state != RDMA_CHANNEL_LIVE) {
1127                 if (ch->state == RDMA_CHANNEL_CONNECTING) {
1128                         spin_lock_irqsave(&ch->spinlock, flags);
1129                         list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1130                         spin_unlock_irqrestore(&ch->spinlock, flags);
1131                 } else
1132                         srpt_reset_ioctx(ch, ioctx);
1133
1134                 return;
1135         }
1136
1137         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
1138                                 MAX_MESSAGE_SIZE, DMA_FROM_DEVICE);
1139
1140         ioctx->data_len = 0;
1141         ioctx->n_rbuf = 0;
1142         ioctx->rbufs = NULL;
1143         ioctx->n_rdma = 0;
1144         ioctx->n_rdma_ius = 0;
1145         ioctx->rdma_ius = NULL;
1146         ioctx->scmnd = NULL;
1147
1148         op = *(u8 *) ioctx->buf;
1149         switch (op) {
1150         case SRP_CMD:
1151                 if (srpt_handle_cmd(ch, ioctx) < 0)
1152                         goto send_rsp;
1153                 break;
1154
1155         case SRP_TSK_MGMT:
1156                 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1157                         goto send_rsp;
1158                 break;
1159
1160         case SRP_I_LOGOUT:
1161         case SRP_AER_REQ:
1162         default:
1163                 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1164                                    ((struct srp_cmd *)ioctx->buf)->tag);
1165
1166                 goto send_rsp;
1167         }
1168
1169         dma_sync_single_for_device(ch->sport->sdev->device->dma_device,
1170                                    ioctx->dma, MAX_MESSAGE_SIZE,
1171                                    DMA_FROM_DEVICE);
1172
1173         return;
1174
1175 send_rsp:
1176         if (ch->state != RDMA_CHANNEL_LIVE ||
1177             srpt_post_send(ch, ioctx,
1178                            sizeof(struct srp_rsp) +
1179                            be32_to_cpu(((struct srp_rsp *)ioctx->buf)->
1180                                        sense_data_len)))
1181                 srpt_reset_ioctx(ch, ioctx);
1182 }
1183
1184 /*
1185  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1186  * should stop.
1187  * @pre thread != 0
1188  */
1189 static inline int srpt_test_ioctx_list(void)
1190 {
1191         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1192                    unlikely(kthread_should_stop()));
1193         return res;
1194 }
1195
1196 /*
1197  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1198  *
1199  * @pre thread != 0
1200  */
1201 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1202 {
1203         unsigned long flags;
1204
1205         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1206         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1207         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1208         wake_up(&ioctx_list_waitQ);
1209 }
1210
1211 /*
1212  * InfiniBand CQ (completion queue) event handler for asynchronous events not
1213  * associated with a completion.
1214  */
1215 static void srpt_completion(struct ib_cq *cq, void *ctx)
1216 {
1217         struct srpt_rdma_ch *ch = ctx;
1218         struct srpt_device *sdev = ch->sport->sdev;
1219         struct ib_wc wc;
1220         struct srpt_ioctx *ioctx;
1221
1222         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1223         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1224                 if (wc.status) {
1225                         printk(KERN_ERR PFX "failed %s status= %d\n",
1226                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1227                                wc.status);
1228                         srpt_handle_err_comp(ch, &wc);
1229                         break;
1230                 }
1231
1232                 if (wc.wr_id & SRPT_OP_RECV) {
1233                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1234                         if (thread) {
1235                                 ioctx->ch = ch;
1236                                 ioctx->op = IB_WC_RECV;
1237                                 srpt_schedule_thread(ioctx);
1238                         } else
1239                                 srpt_handle_new_iu(ch, ioctx);
1240                         continue;
1241                 } else
1242                         ioctx = sdev->ioctx_ring[wc.wr_id];
1243
1244                 if (thread) {
1245                         ioctx->ch = ch;
1246                         ioctx->op = wc.opcode;
1247                         srpt_schedule_thread(ioctx);
1248                 } else {
1249                         switch (wc.opcode) {
1250                         case IB_WC_SEND:
1251                                 srpt_handle_send_comp(ch, ioctx,
1252                                         scst_estimate_context());
1253                                 break;
1254                         case IB_WC_RDMA_WRITE:
1255                         case IB_WC_RDMA_READ:
1256                                 srpt_handle_rdma_comp(ch, ioctx);
1257                                 break;
1258                         default:
1259                                 break;
1260                         }
1261                 }
1262         }
1263 }
1264
1265 /*
1266  * Create a completion queue on the specified device.
1267  */
1268 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1269 {
1270         struct ib_qp_init_attr *qp_init;
1271         struct srpt_device *sdev = ch->sport->sdev;
1272         int cqe;
1273         int ret;
1274
1275         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1276         if (!qp_init)
1277                 return -ENOMEM;
1278
1279         /* Create a completion queue (CQ). */
1280
1281         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1282 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1283         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1284 #else
1285         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1286 #endif
1287         if (IS_ERR(ch->cq)) {
1288                 ret = PTR_ERR(ch->cq);
1289                 printk(KERN_ERR PFX "failed to create_cq cqe= %d ret= %d\n",
1290                         cqe, ret);
1291                 goto out;
1292         }
1293
1294         /* Request completion notification. */
1295
1296         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1297
1298         /* Create a queue pair (QP). */
1299
1300         qp_init->qp_context = (void *)ch;
1301         qp_init->event_handler = srpt_qp_event;
1302         qp_init->send_cq = ch->cq;
1303         qp_init->recv_cq = ch->cq;
1304         qp_init->srq = sdev->srq;
1305         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1306         qp_init->qp_type = IB_QPT_RC;
1307         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1308         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1309
1310         ch->qp = ib_create_qp(sdev->pd, qp_init);
1311         if (IS_ERR(ch->qp)) {
1312                 ret = PTR_ERR(ch->qp);
1313                 ib_destroy_cq(ch->cq);
1314                 printk(KERN_ERR PFX "failed to create_qp ret= %d\n", ret);
1315                 goto out;
1316         }
1317
1318         printk(KERN_DEBUG PFX "%s: max_cqe= %d max_sge= %d cm_id= %p\n",
1319                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1320                ch->cm_id);
1321
1322         /* Modify the attributes and the state of queue pair ch->qp. */
1323
1324         ret = srpt_init_ch_qp(ch, ch->qp);
1325         if (ret) {
1326                 ib_destroy_qp(ch->qp);
1327                 ib_destroy_cq(ch->cq);
1328                 goto out;
1329         }
1330
1331         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1332 out:
1333         kfree(qp_init);
1334         return ret;
1335 }
1336
1337 static struct srpt_rdma_ch *srpt_find_channel(struct ib_cm_id *cm_id)
1338 {
1339         struct srpt_device *sdev = cm_id->context;
1340         struct srpt_rdma_ch *ch, *tmp_ch;
1341
1342         spin_lock_irq(&sdev->spinlock);
1343         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1344                 if (ch->cm_id == cm_id) {
1345                         spin_unlock_irq(&sdev->spinlock);
1346                         return ch;
1347                 }
1348         }
1349
1350         spin_unlock_irq(&sdev->spinlock);
1351
1352         return NULL;
1353 }
1354
1355 /** Release all resources associated with the specified RDMA channel. */
1356 static int srpt_release_channel(struct srpt_rdma_ch *ch, int destroy_cmid)
1357 {
1358         TRACE_ENTRY();
1359
1360         spin_lock_irq(&ch->sport->sdev->spinlock);
1361         list_del(&ch->list);
1362         spin_unlock_irq(&ch->sport->sdev->spinlock);
1363
1364         if (ch->cm_id && destroy_cmid) {
1365                 printk(KERN_WARNING PFX
1366                        "%s: destroy cm_id= %p\n", __func__, ch->cm_id);
1367                 ib_destroy_cm_id(ch->cm_id);
1368                 ch->cm_id = NULL;
1369         }
1370
1371         ib_destroy_qp(ch->qp);
1372         ib_destroy_cq(ch->cq);
1373
1374         if (ch->scst_sess) {
1375                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1376
1377                 printk(KERN_WARNING PFX
1378                        "%s: release sess= %p sess_name= %s active_cmd= %d\n",
1379                        __func__, ch->scst_sess, ch->sess_name,
1380                        ch->active_scmnd_cnt);
1381
1382                 spin_lock_irq(&ch->spinlock);
1383                 list_for_each_entry_safe(ioctx, ioctx_tmp,
1384                                          &ch->active_scmnd_list, scmnd_list) {
1385                         spin_unlock_irq(&ch->spinlock);
1386
1387                         if (ioctx->scmnd)
1388                                 srpt_abort_scst_cmd(ch->sport->sdev,
1389                                                     ioctx->scmnd, true);
1390
1391                         spin_lock_irq(&ch->spinlock);
1392                 }
1393                 WARN_ON(ch->active_scmnd_cnt != 0);
1394                 spin_unlock_irq(&ch->spinlock);
1395
1396                 scst_unregister_session(ch->scst_sess, 0, NULL);
1397                 ch->scst_sess = NULL;
1398         }
1399
1400         kfree(ch);
1401
1402         TRACE_EXIT_RES(!destroy_cmid);
1403
1404         return destroy_cmid ? 0 : 1;
1405 }
1406
1407 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq)
1408 {
1409         spin_lock_irq(&ch->spinlock);
1410         ch->state = RDMA_CHANNEL_DISCONNECTING;
1411         spin_unlock_irq(&ch->spinlock);
1412
1413         if (dreq)
1414                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
1415         else
1416                 ib_send_cm_drep(ch->cm_id, NULL, 0);
1417
1418         return 0;
1419 }
1420
1421 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1422                             struct ib_cm_req_event_param *param,
1423                             void *private_data)
1424 {
1425         struct srpt_device *sdev = cm_id->context;
1426         struct srp_login_req *req;
1427         struct srp_login_rsp *rsp;
1428         struct srp_login_rej *rej;
1429         struct ib_cm_rep_param *rep_param;
1430         struct srpt_rdma_ch *ch, *tmp_ch;
1431         u32 it_iu_len;
1432         int ret = 0;
1433
1434         if (!sdev || !private_data)
1435                 return -EINVAL;
1436
1437         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1438         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1439         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1440
1441         if (!rsp || !rej || !rep_param) {
1442                 ret = -ENOMEM;
1443                 goto out;
1444         }
1445
1446         req = (struct srp_login_req *)private_data;
1447
1448         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1449
1450         printk(KERN_DEBUG PFX
1451                "Host login i_port_id=0x%llx:0x%llx t_port_id=0x%llx:0x%llx"
1452                " it_iu_len=%d\n",
1453                (unsigned long long)
1454                be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1455                (unsigned long long)
1456                be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1457                (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1458                (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1459                it_iu_len);
1460
1461         if (it_iu_len > MAX_MESSAGE_SIZE || it_iu_len < 64) {
1462                 rej->reason =
1463                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1464                 ret = -EINVAL;
1465                 printk(KERN_WARNING PFX
1466                        "Reject invalid it_iu_len=%d\n", it_iu_len);
1467                 goto reject;
1468         }
1469
1470         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1471                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1472
1473                 spin_lock_irq(&sdev->spinlock);
1474
1475                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1476                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1477                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1478                             && param->port == ch->sport->port
1479                             && param->listen_id == ch->sport->sdev->cm_id
1480                             && ch->cm_id) {
1481                                 /* found an existing channel */
1482                                 printk(KERN_WARNING PFX
1483                                        "Found existing channel name= %s"
1484                                        " cm_id= %p state= %d\n",
1485                                        ch->sess_name, ch->cm_id, ch->state);
1486
1487                                 spin_unlock_irq(&sdev->spinlock);
1488
1489                                 rsp->rsp_flags =
1490                                     SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1491
1492                                 if (ch->state == RDMA_CHANNEL_LIVE)
1493                                         srpt_disconnect_channel(ch, 1);
1494                                 else if (ch->state == RDMA_CHANNEL_CONNECTING) {
1495                                         ib_send_cm_rej(ch->cm_id,
1496                                                        IB_CM_REJ_NO_RESOURCES,
1497                                                        NULL, 0, NULL, 0);
1498                                         srpt_release_channel(ch, 1);
1499                                 }
1500
1501                                 spin_lock_irq(&sdev->spinlock);
1502                         }
1503                 }
1504
1505                 spin_unlock_irq(&sdev->spinlock);
1506
1507         } else
1508                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1509
1510         if (((u64) (*(u64 *) req->target_port_id) !=
1511              cpu_to_be64(mellanox_ioc_guid)) ||
1512             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1513              cpu_to_be64(mellanox_ioc_guid))) {
1514                 rej->reason =
1515                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1516                 ret = -ENOMEM;
1517                 printk(KERN_WARNING PFX "Reject invalid target_port_id\n");
1518                 goto reject;
1519         }
1520
1521         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1522         if (!ch) {
1523                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1524                 printk(KERN_WARNING PFX "Reject failed allocate rdma_ch\n");
1525                 ret = -ENOMEM;
1526                 goto reject;
1527         }
1528
1529         spin_lock_init(&ch->spinlock);
1530         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1531         memcpy(ch->t_port_id, req->target_port_id, 16);
1532         ch->sport = &sdev->port[param->port - 1];
1533         ch->cm_id = cm_id;
1534         ch->state = RDMA_CHANNEL_CONNECTING;
1535         INIT_LIST_HEAD(&ch->cmd_wait_list);
1536         INIT_LIST_HEAD(&ch->active_scmnd_list);
1537
1538         ret = srpt_create_ch_ib(ch);
1539         if (ret) {
1540                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1541                 printk(KERN_WARNING PFX "Reject failed to create rdma_ch\n");
1542                 goto free_ch;
1543         }
1544
1545         ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTR);
1546         if (ret) {
1547                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1548                 printk(KERN_WARNING PFX
1549                        "Reject failed qp to rtr/rts ret=%d\n", ret);
1550                 goto destroy_ib;
1551         }
1552
1553         snprintf(ch->sess_name, sizeof(ch->sess_name),
1554                  "0x%016llx%016llx",
1555                  (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1556                  (unsigned long long)be64_to_cpu(*(u64 *)(ch->i_port_id + 8)));
1557
1558         TRACE_DBG("registering session %s", ch->sess_name);
1559
1560         BUG_ON(!sdev->scst_tgt);
1561         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1562                                   NULL, NULL);
1563         if (!ch->scst_sess) {
1564                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1565                 printk(KERN_WARNING PFX "Failed to create scst sess\n");
1566                 goto destroy_ib;
1567         }
1568
1569         spin_lock_irq(&sdev->spinlock);
1570         list_add_tail(&ch->list, &sdev->rch_list);
1571         spin_unlock_irq(&sdev->spinlock);
1572
1573         printk(KERN_DEBUG PFX "Establish connection sess=%p name=%s cm_id=%p\n",
1574                ch->scst_sess, ch->sess_name, ch->cm_id);
1575
1576         scst_sess_set_tgt_priv(ch->scst_sess, ch);
1577
1578         /* create srp_login_response */
1579         rsp->opcode = SRP_LOGIN_RSP;
1580         rsp->tag = req->tag;
1581         rsp->max_it_iu_len = req->req_it_iu_len;
1582         rsp->max_ti_iu_len = req->req_it_iu_len;
1583         rsp->buf_fmt =
1584             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1585         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1586         atomic_set(&ch->req_lim_delta, 0);
1587
1588         /* create cm reply */
1589         rep_param->qp_num = ch->qp->qp_num;
1590         rep_param->private_data = (void *)rsp;
1591         rep_param->private_data_len = sizeof *rsp;
1592         rep_param->rnr_retry_count = 7;
1593         rep_param->flow_control = 1;
1594         rep_param->failover_accepted = 0;
1595         rep_param->srq = 1;
1596         rep_param->responder_resources = 4;
1597         rep_param->initiator_depth = 4;
1598
1599         ret = ib_send_cm_rep(cm_id, rep_param);
1600         if (ret)
1601                 srpt_release_channel(ch, 0);
1602
1603         goto out;
1604
1605 destroy_ib:
1606         ib_destroy_qp(ch->qp);
1607         ib_destroy_cq(ch->cq);
1608
1609 free_ch:
1610         kfree(ch);
1611
1612 reject:
1613         rej->opcode = SRP_LOGIN_REJ;
1614         rej->tag = req->tag;
1615         rej->buf_fmt =
1616             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1617
1618         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1619                              (void *)rej, sizeof *rej);
1620
1621 out:
1622         kfree(rep_param);
1623         kfree(rsp);
1624         kfree(rej);
1625
1626         return ret;
1627 }
1628
1629 static int srpt_find_and_release_channel(struct ib_cm_id *cm_id)
1630 {
1631         struct srpt_rdma_ch *ch;
1632
1633         ch = srpt_find_channel(cm_id);
1634         if (!ch)
1635                 return -EINVAL;
1636
1637         return srpt_release_channel(ch, 0);
1638 }
1639
1640 static int srpt_cm_rej_recv(struct ib_cm_id *cm_id)
1641 {
1642         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1643         return srpt_find_and_release_channel(cm_id);
1644 }
1645
1646 static int srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
1647 {
1648         struct srpt_rdma_ch *ch;
1649         int ret;
1650
1651         ch = srpt_find_channel(cm_id);
1652         if (!ch)
1653                 return -EINVAL;
1654
1655         if (ch->state == RDMA_CHANNEL_CONNECTING) {
1656                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1657
1658                 spin_lock_irq(&ch->spinlock);
1659                 ch->state = RDMA_CHANNEL_LIVE;
1660                 spin_unlock_irq(&ch->spinlock);
1661                 ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTS);
1662
1663                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
1664                                          wait_list) {
1665                         list_del(&ioctx->wait_list);
1666                         srpt_handle_new_iu(ch, ioctx);
1667                 }
1668         } else if (ch->state == RDMA_CHANNEL_DISCONNECTING)
1669                 ret = -EAGAIN;
1670         else
1671                 ret = 0;
1672
1673         if (ret) {
1674                 printk(KERN_ERR PFX "cm_id=%p sess_name=%s state=%d\n",
1675                        cm_id, ch->sess_name, ch->state);
1676                 srpt_disconnect_channel(ch, 1);
1677         }
1678
1679         return ret;
1680 }
1681
1682 static int srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
1683 {
1684         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1685         return srpt_find_and_release_channel(cm_id);
1686 }
1687
1688 static int srpt_cm_rep_error(struct ib_cm_id *cm_id)
1689 {
1690         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1691         return srpt_find_and_release_channel(cm_id);
1692 }
1693
1694 static int srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
1695 {
1696         struct srpt_rdma_ch *ch;
1697         int ret = 0;
1698
1699         ch = srpt_find_channel(cm_id);
1700
1701         if (!ch)
1702                 return -EINVAL;
1703
1704         printk(KERN_DEBUG PFX "%s: cm_id= %p ch->state= %d\n",
1705                  __func__, cm_id, ch->state);
1706
1707         switch (ch->state) {
1708         case RDMA_CHANNEL_LIVE:
1709         case RDMA_CHANNEL_CONNECTING:
1710                 ret = srpt_disconnect_channel(ch, 0);
1711                 break;
1712         case RDMA_CHANNEL_DISCONNECTING:
1713         default:
1714                 break;
1715         }
1716
1717         return ret;
1718 }
1719
1720 static int srpt_cm_drep_recv(struct ib_cm_id *cm_id)
1721 {
1722         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1723         return srpt_find_and_release_channel(cm_id);
1724 }
1725
1726 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1727 {
1728         int ret = 0;
1729
1730         switch (event->event) {
1731         case IB_CM_REQ_RECEIVED:
1732                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
1733                                        event->private_data);
1734                 break;
1735         case IB_CM_REJ_RECEIVED:
1736                 ret = srpt_cm_rej_recv(cm_id);
1737                 break;
1738         case IB_CM_RTU_RECEIVED:
1739         case IB_CM_USER_ESTABLISHED:
1740                 ret = srpt_cm_rtu_recv(cm_id);
1741                 break;
1742         case IB_CM_DREQ_RECEIVED:
1743                 ret = srpt_cm_dreq_recv(cm_id);
1744                 break;
1745         case IB_CM_DREP_RECEIVED:
1746                 ret = srpt_cm_drep_recv(cm_id);
1747                 break;
1748         case IB_CM_TIMEWAIT_EXIT:
1749                 ret = srpt_cm_timewait_exit(cm_id);
1750                 break;
1751         case IB_CM_REP_ERROR:
1752                 ret = srpt_cm_rep_error(cm_id);
1753                 break;
1754         default:
1755                 break;
1756         }
1757
1758         return ret;
1759 }
1760
1761 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1762                                  struct srpt_ioctx *ioctx,
1763                                  struct scst_cmd *scmnd)
1764 {
1765         struct scatterlist *scat;
1766         scst_data_direction dir;
1767         struct rdma_iu *riu;
1768         struct srp_direct_buf *db;
1769         dma_addr_t dma_addr;
1770         struct ib_sge *sge;
1771         u64 raddr;
1772         u32 rsize;
1773         u32 tsize;
1774         u32 dma_len;
1775         int count, nrdma;
1776         int i, j, k;
1777
1778         scat = scst_cmd_get_sg(scmnd);
1779         dir = scst_cmd_get_data_direction(scmnd);
1780         count = dma_map_sg(ch->sport->sdev->device->dma_device, scat,
1781                            scst_cmd_get_sg_cnt(scmnd),
1782                            scst_to_tgt_dma_dir(dir));
1783         if (unlikely(!count))
1784                 return -EBUSY;
1785
1786         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
1787                 nrdma = ioctx->n_rdma_ius;
1788         else {
1789                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
1790
1791                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
1792                                           scst_cmd_atomic(scmnd)
1793                                           ? GFP_ATOMIC : GFP_KERNEL);
1794                 if (!ioctx->rdma_ius) {
1795                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1796                                      scat, scst_cmd_get_sg_cnt(scmnd),
1797                                      scst_to_tgt_dma_dir(dir));
1798                         return -ENOMEM;
1799                 }
1800
1801                 ioctx->n_rdma_ius = nrdma;
1802         }
1803
1804         db = ioctx->rbufs;
1805         tsize = (dir == SCST_DATA_READ) ?
1806                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1807         dma_len = sg_dma_len(&scat[0]);
1808         riu = ioctx->rdma_ius;
1809
1810         /*
1811          * For each remote desc - calculate the #ib_sge.
1812          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
1813          *      each remote desc rdma_iu is required a rdma wr;
1814          * else
1815          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
1816          *      another rdma wr
1817          */
1818         for (i = 0, j = 0;
1819              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1820                 rsize = be32_to_cpu(db->len);
1821                 raddr = be64_to_cpu(db->va);
1822                 riu->raddr = raddr;
1823                 riu->rkey = be32_to_cpu(db->key);
1824                 riu->sge_cnt = 0;
1825
1826                 /* calculate how many sge required for this remote_buf */
1827                 while (rsize > 0 && tsize > 0) {
1828
1829                         if (rsize >= dma_len) {
1830                                 tsize -= dma_len;
1831                                 rsize -= dma_len;
1832                                 raddr += dma_len;
1833
1834                                 if (tsize > 0) {
1835                                         ++j;
1836                                         if (j < count)
1837                                                 dma_len = sg_dma_len(&scat[j]);
1838                                 }
1839                         } else {
1840                                 tsize -= rsize;
1841                                 dma_len -= rsize;
1842                                 rsize = 0;
1843                         }
1844
1845                         ++riu->sge_cnt;
1846
1847                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
1848                                 riu->sge =
1849                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
1850                                             scst_cmd_atomic(scmnd)
1851                                             ? GFP_ATOMIC : GFP_KERNEL);
1852                                 if (!riu->sge)
1853                                         goto free_mem;
1854
1855                                 ++ioctx->n_rdma;
1856                                 ++riu;
1857                                 riu->sge_cnt = 0;
1858                                 riu->raddr = raddr;
1859                                 riu->rkey = be32_to_cpu(db->key);
1860                         }
1861                 }
1862
1863                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
1864                                    scst_cmd_atomic(scmnd)
1865                                    ? GFP_ATOMIC : GFP_KERNEL);
1866
1867                 if (!riu->sge)
1868                         goto free_mem;
1869
1870                 ++ioctx->n_rdma;
1871         }
1872
1873         db = ioctx->rbufs;
1874         scat = scst_cmd_get_sg(scmnd);
1875         tsize = (dir == SCST_DATA_READ) ?
1876                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1877         riu = ioctx->rdma_ius;
1878         dma_len = sg_dma_len(&scat[0]);
1879         dma_addr = sg_dma_address(&scat[0]);
1880
1881         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
1882         for (i = 0, j = 0;
1883              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1884                 rsize = be32_to_cpu(db->len);
1885                 sge = riu->sge;
1886                 k = 0;
1887
1888                 while (rsize > 0 && tsize > 0) {
1889                         sge->addr = dma_addr;
1890                         sge->lkey = ch->sport->sdev->mr->lkey;
1891
1892                         if (rsize >= dma_len) {
1893                                 sge->length =
1894                                         (tsize < dma_len) ? tsize : dma_len;
1895                                 tsize -= dma_len;
1896                                 rsize -= dma_len;
1897
1898                                 if (tsize > 0) {
1899                                         ++j;
1900                                         if (j < count) {
1901                                                 dma_len = sg_dma_len(&scat[j]);
1902                                                 dma_addr =
1903                                                     sg_dma_address(&scat[j]);
1904                                         }
1905                                 }
1906                         } else {
1907                                 sge->length = (tsize < rsize) ? tsize : rsize;
1908                                 tsize -= rsize;
1909                                 dma_len -= rsize;
1910                                 dma_addr += rsize;
1911                                 rsize = 0;
1912                         }
1913
1914                         ++k;
1915                         if (k == riu->sge_cnt && rsize > 0) {
1916                                 ++riu;
1917                                 sge = riu->sge;
1918                                 k = 0;
1919                         } else if (rsize > 0)
1920                                 ++sge;
1921                 }
1922         }
1923
1924         return 0;
1925
1926 free_mem:
1927         while (ioctx->n_rdma)
1928                 kfree(ioctx->rdma_ius[ioctx->n_rdma--].sge);
1929
1930         kfree(ioctx->rdma_ius);
1931
1932         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1933                      scat, scst_cmd_get_sg_cnt(scmnd),
1934                      scst_to_tgt_dma_dir(dir));
1935
1936         return -ENOMEM;
1937 }
1938
1939 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1940                               scst_data_direction dir)
1941 {
1942         struct ib_send_wr wr;
1943         struct ib_send_wr *bad_wr;
1944         struct rdma_iu *riu;
1945         int i;
1946         int ret = 0;
1947
1948         riu = ioctx->rdma_ius;
1949         memset(&wr, 0, sizeof wr);
1950
1951         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
1952                 wr.opcode = (dir == SCST_DATA_READ) ?
1953                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
1954                 wr.next = NULL;
1955                 wr.wr_id = ioctx->index;
1956                 wr.wr.rdma.remote_addr = riu->raddr;
1957                 wr.wr.rdma.rkey = riu->rkey;
1958                 wr.num_sge = riu->sge_cnt;
1959                 wr.sg_list = riu->sge;
1960
1961                 /* only get completion event for the last rdma wr */
1962                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
1963                         wr.send_flags = IB_SEND_SIGNALED;
1964
1965                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
1966                 if (ret)
1967                         break;
1968         }
1969
1970         return ret;
1971 }
1972
1973 /*
1974  * Start data reception. Must not block.
1975  */
1976 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1977                           struct scst_cmd *scmnd)
1978 {
1979         int ret;
1980
1981         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
1982         if (ret) {
1983                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1984                 ret = SCST_TGT_RES_QUEUE_FULL;
1985                 goto out;
1986         }
1987
1988         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
1989         if (ret) {
1990                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1991                 if (ret == -EAGAIN || ret == -ENOMEM)
1992                         ret = SCST_TGT_RES_QUEUE_FULL;
1993                 else
1994                         ret = SCST_TGT_RES_FATAL_ERROR;
1995                 goto out;
1996         }
1997
1998         ret = SCST_TGT_RES_SUCCESS;
1999
2000 out:
2001         return ret;
2002 }
2003
2004 /*
2005  * Called by the SCST core to inform ib_srpt that data reception should start.
2006  * Must not block.
2007  */
2008 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
2009 {
2010         struct srpt_rdma_ch *ch;
2011         struct srpt_ioctx *ioctx;
2012
2013         ioctx = scst_cmd_get_tgt_priv(scmnd);
2014         BUG_ON(!ioctx);
2015
2016         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2017         BUG_ON(!ch);
2018
2019         if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2020                 return SCST_TGT_RES_FATAL_ERROR;
2021         else if (ch->state == RDMA_CHANNEL_CONNECTING)
2022                 return SCST_TGT_RES_QUEUE_FULL;
2023
2024         return srpt_xfer_data(ch, ioctx, scmnd);
2025 }
2026
2027 /*
2028  * Called by the SCST core. Transmits the response buffer and status held in
2029  * 'scmnd'. Must not block.
2030  */
2031 static int srpt_xmit_response(struct scst_cmd *scmnd)
2032 {
2033         struct srpt_rdma_ch *ch;
2034         struct srpt_ioctx *ioctx;
2035         struct srp_rsp *srp_rsp;
2036         u64 tag;
2037         int ret = SCST_TGT_RES_SUCCESS;
2038         int dir;
2039         int status;
2040
2041         ioctx = scst_cmd_get_tgt_priv(scmnd);
2042         BUG_ON(!ioctx);
2043
2044         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2045         BUG_ON(!ch);
2046
2047         tag = scst_cmd_get_tag(scmnd);
2048
2049         if (ch->state != RDMA_CHANNEL_LIVE) {
2050                 printk(KERN_ERR PFX
2051                        "%s: tag= %lld channel in bad state %d\n",
2052                        __func__, (unsigned long long)tag, ch->state);
2053
2054                 if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2055                         ret = SCST_TGT_RES_FATAL_ERROR;
2056                 else if (ch->state == RDMA_CHANNEL_CONNECTING)
2057                         ret = SCST_TGT_RES_QUEUE_FULL;
2058
2059                 if (unlikely(scst_cmd_aborted(scmnd)))
2060                         goto out_aborted;
2061
2062                 goto out;
2063         }
2064
2065         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
2066                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
2067
2068         srp_rsp = ioctx->buf;
2069
2070         if (unlikely(scst_cmd_aborted(scmnd))) {
2071                 printk(KERN_ERR PFX
2072                        "%s: tag= %lld already get aborted\n",
2073                        __func__, (unsigned long long)tag);
2074                 goto out_aborted;
2075         }
2076
2077         dir = scst_cmd_get_data_direction(scmnd);
2078         status = scst_cmd_get_status(scmnd) & 0xff;
2079
2080         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2081
2082         if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2083                 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2084                 if (srp_rsp->sense_data_len >
2085                     (MAX_MESSAGE_SIZE - sizeof *srp_rsp))
2086                         srp_rsp->sense_data_len =
2087                             MAX_MESSAGE_SIZE - sizeof *srp_rsp;
2088
2089                 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2090                        srp_rsp->sense_data_len);
2091
2092                 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2093                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2094
2095                 if (!status)
2096                         status = SAM_STAT_CHECK_CONDITION;
2097         }
2098
2099         srp_rsp->status = status;
2100
2101         /* transfer read data if any */
2102         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2103                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2104                 if (ret != SCST_TGT_RES_SUCCESS) {
2105                         printk(KERN_ERR PFX
2106                                "%s: tag= %lld xfer_data failed\n",
2107                                __func__, (unsigned long long)tag);
2108                         goto out;
2109                 }
2110         }
2111
2112         if (srpt_post_send(ch, ioctx,
2113                            sizeof *srp_rsp +
2114                            be32_to_cpu(srp_rsp->sense_data_len))) {
2115                 printk(KERN_ERR PFX "%s: ch->state= %d tag= %lld\n",
2116                        __func__, ch->state,
2117                        (unsigned long long)tag);
2118                 ret = SCST_TGT_RES_FATAL_ERROR;
2119         }
2120
2121 out:
2122         return ret;
2123
2124 out_aborted:
2125         ret = SCST_TGT_RES_SUCCESS;
2126         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
2127         scst_tgt_cmd_done(scmnd, SCST_CONTEXT_SAME);
2128         goto out;
2129 }
2130
2131 /*
2132  * Called by the SCST core to inform ib_srpt that a received task management
2133  * function has been completed. Must not block.
2134  */
2135 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2136 {
2137         struct srpt_rdma_ch *ch;
2138         struct srpt_mgmt_ioctx *mgmt_ioctx;
2139         struct srpt_ioctx *ioctx;
2140
2141         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2142         BUG_ON(!mgmt_ioctx);
2143
2144         ch = mgmt_ioctx->ch;
2145         BUG_ON(!ch);
2146
2147         ioctx = mgmt_ioctx->ioctx;
2148         BUG_ON(!ioctx);
2149
2150         printk(KERN_WARNING PFX
2151                "%s: tsk_mgmt_done for tag= %lld status=%d\n",
2152                __func__, (unsigned long long)mgmt_ioctx->tag,
2153                scst_mgmt_cmd_get_status(mcmnd));
2154
2155         srpt_build_tskmgmt_rsp(ch, ioctx,
2156                                (scst_mgmt_cmd_get_status(mcmnd) ==
2157                                 SCST_MGMT_STATUS_SUCCESS) ?
2158                                SRP_TSK_MGMT_SUCCESS : SRP_TSK_MGMT_FAILED,
2159                                mgmt_ioctx->tag);
2160         srpt_post_send(ch, ioctx, sizeof(struct srp_rsp) + 4);
2161
2162         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2163
2164         kfree(mgmt_ioctx);
2165 }
2166
2167 /*
2168  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2169  * to be freed. May be called in IRQ context.
2170  */
2171 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2172 {
2173         struct srpt_rdma_ch *ch;
2174         struct srpt_ioctx *ioctx;
2175
2176         ioctx = scst_cmd_get_tgt_priv(scmnd);
2177         BUG_ON(!ioctx);
2178
2179         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2180         BUG_ON(!ch);
2181
2182         spin_lock_irq(&ch->spinlock);
2183         list_del(&ioctx->scmnd_list);
2184         ch->active_scmnd_cnt--;
2185         spin_unlock_irq(&ch->spinlock);
2186
2187         srpt_reset_ioctx(ch, ioctx);
2188         scst_cmd_set_tgt_priv(scmnd, NULL);
2189 }
2190
2191 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2192 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2193 static void srpt_refresh_port_work(void *ctx)
2194 #else
2195 static void srpt_refresh_port_work(struct work_struct *work)
2196 #endif
2197 {
2198 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2199         struct srpt_port *sport = (struct srpt_port *)ctx;
2200 #else
2201         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2202 #endif
2203
2204         srpt_refresh_port(sport);
2205 }
2206
2207 /*
2208  * Called by the SCST core to detect target adapters. Returns the number of
2209  * detected target adapters.
2210  */
2211 static int srpt_detect(struct scst_tgt_template *tp)
2212 {
2213         struct srpt_device *sdev;
2214         int count = 0;
2215
2216         TRACE_ENTRY();
2217
2218         list_for_each_entry(sdev, &srpt_devices, list)
2219                 ++count;
2220
2221         TRACE_EXIT();
2222
2223         return count;
2224 }
2225
2226 /*
2227  * Callback function called by the SCST core from scst_unregister() to free up
2228  * the resources associated with device scst_tgt.
2229  */
2230 static int srpt_release(struct scst_tgt *scst_tgt)
2231 {
2232         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2233         struct srpt_rdma_ch *ch, *tmp_ch;
2234
2235         TRACE_ENTRY();
2236
2237         BUG_ON(!scst_tgt);
2238 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2239         WARN_ON(!sdev);
2240         if (!sdev)
2241                 return -ENODEV;
2242 #else
2243         if (WARN_ON(!sdev))
2244                 return -ENODEV;
2245 #endif
2246
2247         srpt_unregister_procfs_entry(scst_tgt->tgtt);
2248
2249         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list)
2250             srpt_release_channel(ch, 1);
2251
2252         srpt_unregister_mad_agent(sdev);
2253
2254         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2255
2256         TRACE_EXIT();
2257
2258         return 0;
2259 }
2260
2261 /*
2262  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2263  * when the module parameter 'thread' is not zero (the default is zero).
2264  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2265  *
2266  * @pre thread != 0
2267  */
2268 static int srpt_ioctx_thread(void *arg)
2269 {
2270         struct srpt_ioctx *ioctx;
2271
2272         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2273         current->flags |= PF_NOFREEZE;
2274
2275         spin_lock_irq(&srpt_thread.thread_lock);
2276         while (!kthread_should_stop()) {
2277                 wait_queue_t wait;
2278                 init_waitqueue_entry(&wait, current);
2279
2280                 if (!srpt_test_ioctx_list()) {
2281                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2282
2283                         for (;;) {
2284                                 set_current_state(TASK_INTERRUPTIBLE);
2285                                 if (srpt_test_ioctx_list())
2286                                         break;
2287                                 spin_unlock_irq(&srpt_thread.thread_lock);
2288                                 schedule();
2289                                 spin_lock_irq(&srpt_thread.thread_lock);
2290                         }
2291                         set_current_state(TASK_RUNNING);
2292                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2293                 }
2294
2295                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2296                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2297                                            struct srpt_ioctx, comp_list);
2298
2299                         list_del(&ioctx->comp_list);
2300
2301                         spin_unlock_irq(&srpt_thread.thread_lock);
2302                         switch (ioctx->op) {
2303                         case IB_WC_SEND:
2304                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2305                                         SCST_CONTEXT_DIRECT);
2306                                 break;
2307                         case IB_WC_RDMA_WRITE:
2308                         case IB_WC_RDMA_READ:
2309                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2310                                 break;
2311                         case IB_WC_RECV:
2312                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2313                                 break;
2314                         default:
2315                                 break;
2316                         }
2317                         spin_lock_irq(&srpt_thread.thread_lock);
2318                 }
2319         }
2320         spin_unlock_irq(&srpt_thread.thread_lock);
2321
2322         return 0;
2323 }
2324
2325 /* SCST target template for the SRP target implementation. */
2326 static struct scst_tgt_template srpt_template = {
2327         .name = DRV_NAME,
2328         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2329         .xmit_response_atomic = 1,
2330         .rdy_to_xfer_atomic = 1,
2331         .no_proc_entry = 0,
2332         .detect = srpt_detect,
2333         .release = srpt_release,
2334         .xmit_response = srpt_xmit_response,
2335         .rdy_to_xfer = srpt_rdy_to_xfer,
2336         .on_free_cmd = srpt_on_free_cmd,
2337         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2338 };
2339
2340 /*
2341  * The callback function srpt_release_class_dev() is called whenever a
2342  * device is removed from the /sys/class/infiniband_srpt device class.
2343  */
2344 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2345 static void srpt_release_class_dev(struct class_device *class_dev)
2346 #else
2347 static void srpt_release_class_dev(struct device *dev)
2348 #endif
2349 {
2350 }
2351
2352 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2353 static int srpt_trace_level_show(struct seq_file *seq, void *v)
2354 {
2355         return scst_proc_log_entry_read(seq, trace_flag, NULL);
2356 }
2357
2358 static ssize_t srpt_proc_trace_level_write(struct file *file,
2359         const char __user *buf, size_t length, loff_t *off)
2360 {
2361         return scst_proc_log_entry_write(file, buf, length, &trace_flag,
2362                 DEFAULT_SRPT_TRACE_FLAGS, NULL);
2363 }
2364
2365 static struct scst_proc_data srpt_log_proc_data = {
2366         SCST_DEF_RW_SEQ_OP(srpt_proc_trace_level_write)
2367         .show = srpt_trace_level_show,
2368 };
2369 #endif
2370
2371 static struct class_attribute srpt_class_attrs[] = {
2372         __ATTR_NULL,
2373 };
2374
2375 static struct class srpt_class = {
2376         .name = "infiniband_srpt",
2377 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2378         .release = srpt_release_class_dev,
2379 #else
2380         .dev_release = srpt_release_class_dev,
2381 #endif
2382         .class_attrs = srpt_class_attrs,
2383 };
2384
2385 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2386 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2387 #else
2388 static ssize_t show_login_info(struct device *dev,
2389                                struct device_attribute *attr, char *buf)
2390 #endif
2391 {
2392         struct srpt_device *sdev =
2393 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2394                 container_of(class_dev, struct srpt_device, class_dev);
2395 #else
2396                 container_of(dev, struct srpt_device, dev);
2397 #endif
2398         struct srpt_port *sport;
2399         int i;
2400         int len = 0;
2401
2402         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2403                 sport = &sdev->port[i];
2404
2405                 len += sprintf(buf + len,
2406                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2407                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2408                                "service_id=%016llx\n",
2409                                (unsigned long long) mellanox_ioc_guid,
2410                                (unsigned long long) mellanox_ioc_guid,
2411                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2412                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2413                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2414                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2415                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2416                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2417                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2418                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2419                                (unsigned long long) mellanox_ioc_guid);
2420         }
2421
2422         return len;
2423 }
2424
2425 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2426 static CLASS_DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2427 #else
2428 static DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2429 #endif
2430
2431 /*
2432  * Callback function called by the InfiniBand core when either an InfiniBand
2433  * device has been added or during the ib_register_client() call for each
2434  * registered InfiniBand device.
2435  */
2436 static void srpt_add_one(struct ib_device *device)
2437 {
2438         struct srpt_device *sdev;
2439         struct srpt_port *sport;
2440         struct ib_srq_init_attr srq_attr;
2441         int i;
2442
2443         TRACE_ENTRY();
2444
2445         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2446         if (!sdev)
2447                 return;
2448
2449         sdev->device = device;
2450
2451 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2452         sdev->class_dev.class = &srpt_class;
2453         sdev->class_dev.dev = device->dma_device;
2454         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2455                  "srpt-%s", device->name);
2456 #else
2457         sdev->dev.class = &srpt_class;
2458         sdev->dev.parent = device->dma_device;
2459 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2460         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2461 #else
2462         snprintf(sdev->init_name, sizeof(sdev->init_name),
2463                  "srpt-%s", device->name);
2464         sdev->dev.init_name = sdev->init_name;
2465 #endif
2466 #endif
2467
2468 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2469         if (class_device_register(&sdev->class_dev))
2470                 goto free_dev;
2471         if (class_device_create_file(&sdev->class_dev,
2472                                      &class_device_attr_login_info))
2473                 goto err_dev;
2474 #else
2475         if (device_register(&sdev->dev))
2476                 goto free_dev;
2477         if (device_create_file(&sdev->dev, &dev_attr_login_info))
2478                 goto err_dev;
2479 #endif
2480
2481         if (ib_query_device(device, &sdev->dev_attr))
2482                 goto err_dev;
2483
2484         sdev->pd = ib_alloc_pd(device);
2485         if (IS_ERR(sdev->pd))
2486                 goto err_dev;
2487
2488         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2489         if (IS_ERR(sdev->mr))
2490                 goto err_pd;
2491
2492         srq_attr.event_handler = srpt_srq_event;
2493         srq_attr.srq_context = (void *)sdev;
2494         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2495         srq_attr.attr.max_sge = 1;
2496         srq_attr.attr.srq_limit = 0;
2497
2498         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2499         if (IS_ERR(sdev->srq))
2500                 goto err_mr;
2501
2502         printk(KERN_DEBUG PFX "%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
2503                __func__, srq_attr.attr.max_wr,
2504               sdev->dev_attr.max_srq_wr, device->name);
2505
2506         if (!mellanox_ioc_guid)
2507                 mellanox_ioc_guid = be64_to_cpu(device->node_guid);
2508
2509         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2510         if (IS_ERR(sdev->cm_id))
2511                 goto err_srq;
2512
2513         /* print out target login information */
2514         printk(KERN_DEBUG PFX "Target login info: id_ext=%016llx,"
2515                 "ioc_guid=%016llx,pkey=ffff,service_id=%016llx\n",
2516                 (unsigned long long) mellanox_ioc_guid,
2517                 (unsigned long long) mellanox_ioc_guid,
2518                 (unsigned long long) mellanox_ioc_guid);
2519
2520         /*
2521          * We do not have a consistent service_id (ie. also id_ext of target_id)
2522          * to identify this target. We currently use the guid of the first HCA
2523          * in the system as service_id; therefore, the target_id will change
2524          * if this HCA is gone bad and replaced by different HCA
2525          */
2526         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(mellanox_ioc_guid), 0, NULL))
2527                 goto err_cm;
2528
2529         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2530                               srpt_event_handler);
2531         if (ib_register_event_handler(&sdev->event_handler))
2532                 goto err_cm;
2533
2534         if (srpt_alloc_ioctx_ring(sdev))
2535                 goto err_event;
2536
2537         INIT_LIST_HEAD(&sdev->rch_list);
2538         spin_lock_init(&sdev->spinlock);
2539
2540         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2541                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2542
2543         list_add_tail(&sdev->list, &srpt_devices);
2544
2545         ib_set_client_data(device, &srpt_client, sdev);
2546
2547         sdev->scst_tgt = scst_register(&srpt_template, NULL);
2548         if (!sdev->scst_tgt) {
2549                 printk(KERN_ERR PFX "SCST registration failed for %s.\n",
2550                         sdev->device->name);
2551                 goto err_ring;
2552         }
2553
2554         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
2555
2556         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
2557                 sport = &sdev->port[i - 1];
2558                 sport->sdev = sdev;
2559                 sport->port = i;
2560 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2561                 /*
2562                  * A vanilla 2.6.19 or older kernel without backported OFED
2563                  * kernel headers.
2564                  */
2565                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
2566 #else
2567                 INIT_WORK(&sport->work, srpt_refresh_port_work);
2568 #endif
2569                 if (srpt_refresh_port(sport)) {
2570                         printk(KERN_ERR PFX "MAD registration failed"
2571                                " for %s-%d.\n", sdev->device->name, i);
2572                         goto err_refresh_port;
2573                 }
2574         }
2575
2576         TRACE_EXIT();
2577
2578         return;
2579
2580 err_refresh_port:
2581         scst_unregister(sdev->scst_tgt);
2582 err_ring:
2583         ib_set_client_data(device, &srpt_client, NULL);
2584         list_del(&sdev->list);
2585         srpt_free_ioctx_ring(sdev);
2586 err_event:
2587         ib_unregister_event_handler(&sdev->event_handler);
2588 err_cm:
2589         ib_destroy_cm_id(sdev->cm_id);
2590 err_srq:
2591         ib_destroy_srq(sdev->srq);
2592 err_mr:
2593         ib_dereg_mr(sdev->mr);
2594 err_pd:
2595         ib_dealloc_pd(sdev->pd);
2596 err_dev:
2597 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2598         class_device_unregister(&sdev->class_dev);
2599 #else
2600         device_unregister(&sdev->dev);
2601 #endif
2602 free_dev:
2603         kfree(sdev);
2604
2605         TRACE_EXIT();
2606 }
2607
2608 /*
2609  * Callback function called by the InfiniBand core when either an InfiniBand
2610  * device has been removed or during the ib_unregister_client() call for each
2611  * registered InfiniBand device.
2612  */
2613 static void srpt_remove_one(struct ib_device *device)
2614 {
2615         int i;
2616         struct srpt_device *sdev;
2617
2618         TRACE_ENTRY();
2619
2620         sdev = ib_get_client_data(device, &srpt_client);
2621 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2622         WARN_ON(!sdev);
2623         if (!sdev)
2624                 return;
2625 #else
2626         if (WARN_ON(!sdev))
2627                 return;
2628 #endif
2629
2630         /*
2631          * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
2632          * finished if it is running.
2633          */
2634         for (i = 0; i < sdev->device->phys_port_cnt; i++)
2635                 cancel_work_sync(&sdev->port[i].work);
2636
2637         scst_unregister(sdev->scst_tgt);
2638         sdev->scst_tgt = NULL;
2639
2640         ib_unregister_event_handler(&sdev->event_handler);
2641         ib_destroy_cm_id(sdev->cm_id);
2642         ib_destroy_srq(sdev->srq);
2643         ib_dereg_mr(sdev->mr);
2644         ib_dealloc_pd(sdev->pd);
2645 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2646         class_device_unregister(&sdev->class_dev);
2647 #else
2648         device_unregister(&sdev->dev);
2649 #endif
2650
2651         srpt_free_ioctx_ring(sdev);
2652         list_del(&sdev->list);
2653         kfree(sdev);
2654
2655         TRACE_EXIT();
2656 }
2657
2658 /**
2659  * Create procfs entries for srpt. Currently the only procfs entry created
2660  * by this function is the "trace_level" entry.
2661  */
2662 static int srpt_register_procfs_entry(struct scst_tgt_template *tgt)
2663 {
2664         int res = 0;
2665 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2666         struct proc_dir_entry *p, *root;
2667
2668         root = scst_proc_get_tgt_root(tgt);
2669         WARN_ON(!root);
2670         if (root) {
2671                 /*
2672                  * Fill in the scst_proc_data::data pointer, which is used in
2673                  * a printk(KERN_INFO ...) statement in
2674                  * scst_proc_log_entry_write() in scst_proc.c.
2675                  */
2676                 srpt_log_proc_data.data = (char *)tgt->name;
2677                 p = scst_create_proc_entry(root, SRPT_PROC_TRACE_LEVEL_NAME,
2678                                            &srpt_log_proc_data);
2679                 if (!p)
2680                         res = -ENOMEM;
2681         } else
2682                 res = -ENOMEM;
2683
2684 #endif
2685         return res;
2686 }
2687
2688 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt)
2689 {
2690 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2691         struct proc_dir_entry *root;
2692
2693         root = scst_proc_get_tgt_root(tgt);
2694         WARN_ON(!root);
2695         if (root)
2696                 remove_proc_entry(SRPT_PROC_TRACE_LEVEL_NAME, root);
2697 #endif
2698 }
2699
2700 /*
2701  * Module initialization.
2702  *
2703  * Note: since ib_register_client() registers callback functions, and since at
2704  * least one of these callback functions (srpt_add_one()) calls SCST functions,
2705  * the SCST target template must be registered before ib_register_client() is
2706  * called.
2707  */
2708 static int __init srpt_init_module(void)
2709 {
2710         int ret;
2711
2712         INIT_LIST_HEAD(&srpt_devices);
2713
2714         ret = class_register(&srpt_class);
2715         if (ret) {
2716                 printk(KERN_ERR PFX "couldn't register class ib_srpt\n");
2717                 goto out;
2718         }
2719
2720         ret = scst_register_target_template(&srpt_template);
2721         if (ret < 0) {
2722                 printk(KERN_ERR PFX "couldn't register with scst\n");
2723                 ret = -ENODEV;
2724                 goto out_unregister_class;
2725         }
2726
2727         ret = srpt_register_procfs_entry(&srpt_template);
2728         if (ret) {
2729                 printk(KERN_ERR PFX "couldn't register procfs entry\n");
2730                 goto out_unregister_target;
2731         }
2732
2733         ret = ib_register_client(&srpt_client);
2734         if (ret) {
2735                 printk(KERN_ERR PFX "couldn't register IB client\n");
2736                 goto out_unregister_target;
2737         }
2738
2739         if (thread) {
2740                 spin_lock_init(&srpt_thread.thread_lock);
2741                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
2742                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
2743                                                  NULL, "srpt_thread");
2744                 if (IS_ERR(srpt_thread.thread)) {
2745                         srpt_thread.thread = NULL;
2746                         thread = 0;
2747                 }
2748         }
2749
2750         return 0;
2751
2752 out_unregister_target:
2753         /*
2754          * Note: the procfs entry is unregistered in srpt_release(), which is
2755          * called by scst_unregister_target_template().
2756          */
2757         scst_unregister_target_template(&srpt_template);
2758 out_unregister_class:
2759         class_unregister(&srpt_class);
2760 out:
2761         return ret;
2762 }
2763
2764 static void __exit srpt_cleanup_module(void)
2765 {
2766         TRACE_ENTRY();
2767
2768         if (srpt_thread.thread)
2769                 kthread_stop(srpt_thread.thread);
2770         ib_unregister_client(&srpt_client);
2771         scst_unregister_target_template(&srpt_template);
2772         class_unregister(&srpt_class);
2773
2774         TRACE_EXIT();
2775 }
2776
2777 module_init(srpt_init_module);
2778 module_exit(srpt_cleanup_module);