3db48357452736edf739a541dd5f02444d4ab6a1
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43 #include <asm/atomic.h>
44 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #endif
48 #include "ib_srpt.h"
49 #include "scst_debug.h"
50
51 /* Name of this kernel module. */
52 #define DRV_NAME                "ib_srpt"
53 /* Prefix for printk() kernel messages. */
54 #define PFX                     DRV_NAME ": "
55 #define DRV_VERSION             "1.0.1"
56 #define DRV_RELDATE             "July 10, 2008"
57 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
58 /* Flags to be used in SCST debug tracing statements. */
59 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
60                                   | TRACE_MGMT | TRACE_SPECIAL)
61 /* Name of the entry that will be created under /proc/scsi_tgt/ib_srpt. */
62 #define SRPT_PROC_TRACE_LEVEL_NAME      "trace_level"
63 #endif
64
65 #define MELLANOX_SRPT_ID_STRING "Mellanox OFED SRP target"
66
67 MODULE_AUTHOR("Vu Pham");
68 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
69                    "v" DRV_VERSION " (" DRV_RELDATE ")");
70 MODULE_LICENSE("Dual BSD/GPL");
71
72 struct srpt_thread {
73         /* Protects thread_ioctx_list. */
74         spinlock_t thread_lock;
75         /* I/O contexts to be processed by the kernel thread. */
76         struct list_head thread_ioctx_list;
77         /* SRPT kernel thread. */
78         struct task_struct *thread;
79 };
80
81 /*
82  * Global Variables
83  */
84
85 static u64 mellanox_ioc_guid;
86 /* List of srpt_device structures. */
87 static struct list_head srpt_devices;
88 static int thread;
89 static struct srpt_thread srpt_thread;
90 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
91 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
92 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
93 module_param(trace_flag, long, 0644);
94 MODULE_PARM_DESC(trace_flag,
95                  "Trace flags for the ib_srpt kernel module.");
96 #endif
97
98 module_param(thread, int, 0444);
99 MODULE_PARM_DESC(thread,
100                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
101                  "where possible");
102
103 static void srpt_add_one(struct ib_device *device);
104 static void srpt_remove_one(struct ib_device *device);
105 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq);
106 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
107 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt);
108
109 static struct ib_client srpt_client = {
110         .name = DRV_NAME,
111         .add = srpt_add_one,
112         .remove = srpt_remove_one
113 };
114
115 /*
116  * Callback function called by the InfiniBand core when an asynchronous IB
117  * event occurs. This callback may occur in interrupt context. See also
118  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
119  * Architecture Specification.
120  */
121 static void srpt_event_handler(struct ib_event_handler *handler,
122                                struct ib_event *event)
123 {
124         struct srpt_device *sdev =
125             ib_get_client_data(event->device, &srpt_client);
126         struct srpt_port *sport;
127
128         if (!sdev || sdev->device != event->device)
129                 return;
130
131         printk(KERN_WARNING PFX "ASYNC event= %d on device= %s\n",
132                 event->event, sdev->device->name);
133
134         switch (event->event) {
135         case IB_EVENT_PORT_ERR:
136                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
137                         sport = &sdev->port[event->element.port_num - 1];
138                         sport->lid = 0;
139                         sport->sm_lid = 0;
140                 }
141                 break;
142         case IB_EVENT_PORT_ACTIVE:
143         case IB_EVENT_LID_CHANGE:
144         case IB_EVENT_PKEY_CHANGE:
145         case IB_EVENT_SM_CHANGE:
146         case IB_EVENT_CLIENT_REREGISTER:
147                 /*
148                  * Refresh port data asynchronously. Note: it is safe to call
149                  * schedule_work() even if &sport->work is already on the
150                  * global workqueue because schedule_work() tests for the
151                  * work_pending() condition before adding &sport->work to the
152                  * global work queue.
153                  */
154                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
155                         sport = &sdev->port[event->element.port_num - 1];
156                         if (!sport->lid && !sport->sm_lid)
157                                 schedule_work(&sport->work);
158                 }
159                 break;
160         default:
161                 break;
162         }
163
164 }
165
166 /*
167  * Callback function called by the InfiniBand core for SRQ (shared receive
168  * queue) events.
169  */
170 static void srpt_srq_event(struct ib_event *event, void *ctx)
171 {
172         printk(KERN_WARNING PFX "SRQ event %d\n", event->event);
173 }
174
175 /*
176  * Callback function called by the InfiniBand core for QP (queue pair) events.
177  */
178 static void srpt_qp_event(struct ib_event *event, void *ctx)
179 {
180         struct srpt_rdma_ch *ch = ctx;
181
182         printk(KERN_WARNING PFX
183                "QP event %d on cm_id=%p sess_name=%s state=%d\n",
184                event->event, ch->cm_id, ch->sess_name, ch->state);
185
186         switch (event->event) {
187         case IB_EVENT_COMM_EST:
188 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
189                 ib_cm_notify(ch->cm_id, event->event);
190 #else
191                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
192                 printk(KERN_ERR PFX "how to perform ib_cm_notify() on a"
193                         " vanilla 2.6.18 kernel ???\n");
194 #endif
195                 break;
196         case IB_EVENT_QP_LAST_WQE_REACHED:
197                 if (ch->state == RDMA_CHANNEL_LIVE) {
198                         printk(KERN_WARNING PFX
199                                "Schedule CM_DISCONNECT_WORK\n");
200                         srpt_disconnect_channel(ch, 1);
201                 }
202                 break;
203         default:
204                 break;
205         }
206 }
207
208 /*
209  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
210  * the lowest four bits of value in element slot of the array of four bit
211  * elements called c_list (controller list). The index slot is one-based.
212  *
213  * @pre 1 <= slot && 0 <= value && value < 16
214  */
215 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
216 {
217         u16 id;
218         u8 tmp;
219
220         id = (slot - 1) / 2;
221         if (slot & 0x1) {
222                 tmp = c_list[id] & 0xf;
223                 c_list[id] = (value << 4) | tmp;
224         } else {
225                 tmp = c_list[id] & 0xf0;
226                 c_list[id] = (value & 0xf) | tmp;
227         }
228 }
229
230 /*
231  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
232  * ClassPortInfo in the InfiniBand Architecture Specification.
233  */
234 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
235 {
236         struct ib_class_port_info *cif;
237
238         cif = (struct ib_class_port_info *)mad->data;
239         memset(cif, 0, sizeof *cif);
240         cif->base_version = 1;
241         cif->class_version = 1;
242         cif->resp_time_value = 20;
243
244         mad->mad_hdr.status = 0;
245 }
246
247 /*
248  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
249  * InfiniBand Architecture Specification. See also section B.7,
250  * table B.6 in the T10 SRP r16a document.
251  */
252 static void srpt_get_iou(struct ib_dm_mad *mad)
253 {
254         struct ib_dm_iou_info *ioui;
255         u8 slot;
256         int i;
257
258         ioui = (struct ib_dm_iou_info *)mad->data;
259         ioui->change_id = 1;
260         ioui->max_controllers = 16;
261
262         /* set present for slot 1 and empty for the rest */
263         srpt_set_ioc(ioui->controller_list, 1, 1);
264         for (i = 1, slot = 2; i < 16; i++, slot++)
265                 srpt_set_ioc(ioui->controller_list, slot, 0);
266
267         mad->mad_hdr.status = 0;
268 }
269
270 /*
271  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
272  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
273  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
274  * document.
275  */
276 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
277                          struct ib_dm_mad *mad)
278 {
279         struct ib_dm_ioc_profile *iocp;
280
281         iocp = (struct ib_dm_ioc_profile *)mad->data;
282
283         if (!slot || slot > 16) {
284                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
285                 return;
286         }
287
288         if (slot > 2) {
289                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
290                 return;
291         }
292
293         memset(iocp, 0, sizeof *iocp);
294         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
295         iocp->guid = cpu_to_be64(mellanox_ioc_guid);
296         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
297         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
298         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
299         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
300         iocp->subsys_device_id = 0x0;
301         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
302         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
303         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
304         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
305         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
306         iocp->rdma_read_depth = 4;
307         iocp->send_size = cpu_to_be32(MAX_MESSAGE_SIZE);
308         iocp->rdma_size = cpu_to_be32(MAX_RDMA_SIZE);
309         iocp->num_svc_entries = 1;
310         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
311             SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
312
313         mad->mad_hdr.status = 0;
314 }
315
316 /*
317  * Device management: write ServiceEntries to mad for the given slot. See also
318  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
319  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
320  */
321 static void srpt_get_svc_entries(u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
322 {
323         struct ib_dm_svc_entries *svc_entries;
324
325         if (!slot || slot > 16) {
326                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
327                 return;
328         }
329
330         if (slot > 2 || lo > hi || hi > 1) {
331                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
332                 return;
333         }
334
335         svc_entries = (struct ib_dm_svc_entries *)mad->data;
336         memset(svc_entries, 0, sizeof *svc_entries);
337         svc_entries->service_entries[0].id = cpu_to_be64(mellanox_ioc_guid);
338         snprintf(svc_entries->service_entries[0].name,
339                  sizeof(svc_entries->service_entries[0].name),
340                  "%s%016llx",
341                  SRP_SERVICE_NAME_PREFIX,
342                  (unsigned long long)mellanox_ioc_guid);
343
344         mad->mad_hdr.status = 0;
345 }
346
347 /*
348  * Actual processing of a received MAD *rq_mad received through source port *sp
349  * (MAD = InfiniBand management datagram). The response to be sent back is
350  * written to *rsp_mad.
351  */
352 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
353                                  struct ib_dm_mad *rsp_mad)
354 {
355         u16 attr_id;
356         u32 slot;
357         u8 hi, lo;
358
359         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
360         switch (attr_id) {
361         case DM_ATTR_CLASS_PORT_INFO:
362                 srpt_get_class_port_info(rsp_mad);
363                 break;
364         case DM_ATTR_IOU_INFO:
365                 srpt_get_iou(rsp_mad);
366                 break;
367         case DM_ATTR_IOC_PROFILE:
368                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
369                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
370                 break;
371         case DM_ATTR_SVC_ENTRIES:
372                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
373                 hi = (u8) ((slot >> 8) & 0xff);
374                 lo = (u8) (slot & 0xff);
375                 slot = (u16) ((slot >> 16) & 0xffff);
376                 srpt_get_svc_entries(slot, hi, lo, rsp_mad);
377                 break;
378         default:
379                 rsp_mad->mad_hdr.status =
380                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
381                 break;
382         }
383 }
384
385 /*
386  * Callback function that is called by the InfiniBand core after transmission of
387  * a MAD. (MAD = management datagram; AH = address handle.)
388  */
389 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
390                                   struct ib_mad_send_wc *mad_wc)
391 {
392         ib_destroy_ah(mad_wc->send_buf->ah);
393         ib_free_send_mad(mad_wc->send_buf);
394 }
395
396 /*
397  * Callback function that is called by the InfiniBand core after reception of
398  * a MAD (management datagram).
399  */
400 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
401                                   struct ib_mad_recv_wc *mad_wc)
402 {
403         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
404         struct ib_ah *ah;
405         struct ib_mad_send_buf *rsp;
406         struct ib_dm_mad *dm_mad;
407
408         if (!mad_wc || !mad_wc->recv_buf.mad)
409                 return;
410
411         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
412                                   mad_wc->recv_buf.grh, mad_agent->port_num);
413         if (IS_ERR(ah))
414                 goto err;
415
416         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
417
418         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
419                                  mad_wc->wc->pkey_index, 0,
420                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
421                                  GFP_KERNEL);
422         if (IS_ERR(rsp))
423                 goto err_rsp;
424
425         rsp->ah = ah;
426
427         dm_mad = rsp->mad;
428         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
429         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
430         dm_mad->mad_hdr.status = 0;
431
432         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
433         case IB_MGMT_METHOD_GET:
434                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
435                 break;
436         case IB_MGMT_METHOD_SET:
437                 dm_mad->mad_hdr.status =
438                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
439                 break;
440         default:
441                 dm_mad->mad_hdr.status =
442                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
443                 break;
444         }
445
446         if (!ib_post_send_mad(rsp, NULL)) {
447                 ib_free_recv_mad(mad_wc);
448                 /* will destroy_ah & free_send_mad in send completion */
449                 return;
450         }
451
452         ib_free_send_mad(rsp);
453
454 err_rsp:
455         ib_destroy_ah(ah);
456 err:
457         ib_free_recv_mad(mad_wc);
458 }
459
460 /*
461  * Enable InfiniBand management datagram processing, update the cached sm_lid,
462  * lid and gid values, and register a callback function for processing MADs
463  * on the specified port. It is safe to call this function more than once for
464  * the same port.
465  */
466 static int srpt_refresh_port(struct srpt_port *sport)
467 {
468         struct ib_mad_reg_req reg_req;
469         struct ib_port_modify port_modify;
470         struct ib_port_attr port_attr;
471         int ret;
472
473         memset(&port_modify, 0, sizeof port_modify);
474         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
475         port_modify.clr_port_cap_mask = 0;
476
477         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
478         if (ret)
479                 goto err_mod_port;
480
481         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
482         if (ret)
483                 goto err_query_port;
484
485         sport->sm_lid = port_attr.sm_lid;
486         sport->lid = port_attr.lid;
487
488         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
489         if (ret)
490                 goto err_query_port;
491
492         if (!sport->mad_agent) {
493                 memset(&reg_req, 0, sizeof reg_req);
494                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
495                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
496                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
497                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
498
499                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
500                                                          sport->port,
501                                                          IB_QPT_GSI,
502                                                          &reg_req, 0,
503                                                          srpt_mad_send_handler,
504                                                          srpt_mad_recv_handler,
505                                                          sport);
506                 if (IS_ERR(sport->mad_agent)) {
507                         ret = PTR_ERR(sport->mad_agent);
508                         sport->mad_agent = NULL;
509                         goto err_query_port;
510                 }
511         }
512
513         return 0;
514
515 err_query_port:
516
517         port_modify.set_port_cap_mask = 0;
518         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
519         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
520
521 err_mod_port:
522
523         return ret;
524 }
525
526 /*
527  * Unregister the callback function for processing MADs and disable MAD
528  * processing for all ports of the specified device. It is safe to call this
529  * function more than once for the same device.
530  */
531 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
532 {
533         struct ib_port_modify port_modify = {
534                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
535         };
536         struct srpt_port *sport;
537         int i;
538
539         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
540                 sport = &sdev->port[i - 1];
541                 WARN_ON(sport->port != i);
542                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
543                         printk(KERN_ERR PFX "disabling MAD processing"
544                                " failed.\n");
545                 if (sport->mad_agent) {
546                         ib_unregister_mad_agent(sport->mad_agent);
547                         sport->mad_agent = NULL;
548                 }
549         }
550 }
551
552 /*
553  * Allocate and initialize an SRPT I/O context structure.
554  */
555 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
556 {
557         struct srpt_ioctx *ioctx;
558
559         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
560         if (!ioctx)
561                 goto out;
562
563         ioctx->buf = kzalloc(MAX_MESSAGE_SIZE, GFP_KERNEL);
564         if (!ioctx->buf)
565                 goto out_free_ioctx;
566
567         ioctx->dma = dma_map_single(sdev->device->dma_device, ioctx->buf,
568                                     MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
569 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
570         if (dma_mapping_error(sdev->device->dma_device, ioctx->dma))
571 #else
572         if (dma_mapping_error(ioctx->dma))
573 #endif
574                 goto out_free_buf;
575
576         return ioctx;
577
578 out_free_buf:
579         kfree(ioctx->buf);
580 out_free_ioctx:
581         kfree(ioctx);
582 out:
583         return NULL;
584 }
585
586 /*
587  * Deallocate an SRPT I/O context structure.
588  */
589 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
590 {
591         if (!ioctx)
592                 return;
593
594         dma_unmap_single(sdev->device->dma_device, ioctx->dma,
595                          MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
596         kfree(ioctx->buf);
597         kfree(ioctx);
598 }
599
600 /*
601  * Associate a ring of SRPT I/O context structures with the specified device.
602  */
603 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
604 {
605         int i;
606
607         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
608                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
609
610                 if (!sdev->ioctx_ring[i])
611                         goto err;
612
613                 sdev->ioctx_ring[i]->index = i;
614         }
615
616         return 0;
617
618 err:
619         while (--i > 0) {
620                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
621                 sdev->ioctx_ring[i] = NULL;
622         }
623         return -ENOMEM;
624 }
625
626 /* Free the ring of SRPT I/O context structures. */
627 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
628 {
629         int i;
630
631         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
632                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
633                 sdev->ioctx_ring[i] = NULL;
634         }
635 }
636
637 /*
638  * Post a receive request on the work queue of InfiniBand device 'sdev'.
639  */
640 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
641 {
642         struct ib_sge list;
643         struct ib_recv_wr wr, *bad_wr;
644
645         wr.wr_id = ioctx->index | SRPT_OP_RECV;
646
647         list.addr = ioctx->dma;
648         list.length = MAX_MESSAGE_SIZE;
649         list.lkey = sdev->mr->lkey;
650
651         wr.next = NULL;
652         wr.sg_list = &list;
653         wr.num_sge = 1;
654
655         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
656 }
657
658 /*
659  * Post a send request on the SRPT RDMA channel 'ch'.
660  */
661 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
662                           int len)
663 {
664         struct ib_sge list;
665         struct ib_send_wr wr, *bad_wr;
666         struct srpt_device *sdev = ch->sport->sdev;
667
668         dma_sync_single_for_device(sdev->device->dma_device, ioctx->dma,
669                                    MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
670
671         list.addr = ioctx->dma;
672         list.length = len;
673         list.lkey = sdev->mr->lkey;
674
675         wr.next = NULL;
676         wr.wr_id = ioctx->index;
677         wr.sg_list = &list;
678         wr.num_sge = 1;
679         wr.opcode = IB_WR_SEND;
680         wr.send_flags = IB_SEND_SIGNALED;
681
682         return ib_post_send(ch->qp, &wr, &bad_wr);
683 }
684
685 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
686                              int *ind)
687 {
688         struct srp_indirect_buf *idb;
689         struct srp_direct_buf *db;
690
691         *ind = 0;
692         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
693             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
694                 ioctx->n_rbuf = 1;
695                 ioctx->rbufs = &ioctx->single_rbuf;
696
697                 db = (void *)srp_cmd->add_data;
698                 memcpy(ioctx->rbufs, db, sizeof *db);
699                 ioctx->data_len = be32_to_cpu(db->len);
700         } else {
701                 idb = (void *)srp_cmd->add_data;
702
703                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
704
705                 if (ioctx->n_rbuf >
706                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
707                         *ind = 1;
708                         ioctx->n_rbuf = 0;
709                         goto out;
710                 }
711
712                 if (ioctx->n_rbuf == 1)
713                         ioctx->rbufs = &ioctx->single_rbuf;
714                 else
715                         ioctx->rbufs =
716                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
717                 if (!ioctx->rbufs) {
718                         ioctx->n_rbuf = 0;
719                         return -ENOMEM;
720                 }
721
722                 db = idb->desc_list;
723                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
724                 ioctx->data_len = be32_to_cpu(idb->len);
725         }
726 out:
727         return 0;
728 }
729
730 /*
731  * Modify the attributes of queue pair 'qp': allow local write, remote read,
732  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
733  */
734 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
735 {
736         struct ib_qp_attr *attr;
737         int ret;
738
739         attr = kzalloc(sizeof *attr, GFP_KERNEL);
740         if (!attr)
741                 return -ENOMEM;
742
743         attr->qp_state = IB_QPS_INIT;
744         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
745             IB_ACCESS_REMOTE_WRITE;
746         attr->port_num = ch->sport->port;
747         attr->pkey_index = 0;
748
749         ret = ib_modify_qp(qp, attr,
750                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
751                            IB_QP_PKEY_INDEX);
752
753         kfree(attr);
754         return ret;
755 }
756
757 static int srpt_ch_qp_rtr_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp,
758                               enum ib_qp_state qp_state)
759 {
760         struct ib_qp_attr *qp_attr;
761         int attr_mask;
762         int ret;
763
764         qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
765         if (!qp_attr)
766                 return -ENOMEM;
767
768         qp_attr->qp_state = qp_state;
769         ret = ib_cm_init_qp_attr(ch->cm_id, qp_attr, &attr_mask);
770         if (ret)
771                 goto out;
772
773         if (qp_state == IB_QPS_RTR)
774                 qp_attr->max_dest_rd_atomic = 4;
775         else
776                 qp_attr->max_rd_atomic = 4;
777
778         ret = ib_modify_qp(qp, qp_attr, attr_mask);
779
780 out:
781         kfree(qp_attr);
782         return ret;
783 }
784
785 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
786 {
787         int i;
788
789         if (ioctx->n_rdma_ius > 0 && ioctx->rdma_ius) {
790                 struct rdma_iu *riu = ioctx->rdma_ius;
791
792                 for (i = 0; i < ioctx->n_rdma_ius; ++i, ++riu)
793                         kfree(riu->sge);
794                 kfree(ioctx->rdma_ius);
795         }
796
797         if (ioctx->n_rbuf > 1)
798                 kfree(ioctx->rbufs);
799
800         if (srpt_post_recv(ch->sport->sdev, ioctx))
801                 printk(KERN_ERR PFX "SRQ post_recv failed - this is serious\n");
802                 /* we should queue it back to free_ioctx queue */
803         else
804                 atomic_inc(&ch->req_lim_delta);
805 }
806
807 static void srpt_abort_scst_cmd(struct srpt_device *sdev,
808                                 struct scst_cmd *scmnd,
809                                 bool tell_initiator)
810 {
811         scst_data_direction dir;
812
813         dir = scst_cmd_get_data_direction(scmnd);
814         if (dir != SCST_DATA_NONE) {
815                 dma_unmap_sg(sdev->device->dma_device,
816                              scst_cmd_get_sg(scmnd),
817                              scst_cmd_get_sg_cnt(scmnd),
818                              scst_to_tgt_dma_dir(dir));
819
820                 if (scmnd->state == SCST_CMD_STATE_DATA_WAIT) {
821                         scst_rx_data(scmnd,
822                                      tell_initiator ? SCST_RX_STATUS_ERROR
823                                      : SCST_RX_STATUS_ERROR_FATAL,
824                                      SCST_CONTEXT_THREAD);
825                         goto out;
826                 } else if (scmnd->state == SCST_CMD_STATE_XMIT_WAIT)
827                         ;
828         }
829
830         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_FAILED);
831         scst_tgt_cmd_done(scmnd, scst_estimate_context());
832 out:
833         return;
834 }
835
836 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
837 {
838         struct srpt_ioctx *ioctx;
839         struct srpt_device *sdev = ch->sport->sdev;
840
841         if (wc->wr_id & SRPT_OP_RECV) {
842                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
843                 printk(KERN_ERR PFX "This is serious - SRQ is in bad state\n");
844         } else {
845                 ioctx = sdev->ioctx_ring[wc->wr_id];
846
847                 if (ioctx->scmnd)
848                         srpt_abort_scst_cmd(sdev, ioctx->scmnd, true);
849                 else
850                         srpt_reset_ioctx(ch, ioctx);
851         }
852 }
853
854 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
855                                   struct srpt_ioctx *ioctx,
856                                   enum scst_exec_context context)
857 {
858         if (ioctx->scmnd) {
859                 scst_data_direction dir =
860                         scst_cmd_get_data_direction(ioctx->scmnd);
861
862                 if (dir != SCST_DATA_NONE)
863                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
864                                      scst_cmd_get_sg(ioctx->scmnd),
865                                      scst_cmd_get_sg_cnt(ioctx->scmnd),
866                                      scst_to_tgt_dma_dir(dir));
867
868                 scst_tgt_cmd_done(ioctx->scmnd, context);
869         } else
870                 srpt_reset_ioctx(ch, ioctx);
871 }
872
873 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
874                                   struct srpt_ioctx *ioctx)
875 {
876         if (!ioctx->scmnd) {
877                 srpt_reset_ioctx(ch, ioctx);
878                 return;
879         }
880
881         if (scst_cmd_get_data_direction(ioctx->scmnd) == SCST_DATA_WRITE)
882                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
883                         scst_estimate_context());
884 }
885
886 static void srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
887                                struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
888                                u64 tag)
889 {
890         struct srp_rsp *srp_rsp;
891         struct sense_data *sense;
892         int limit_delta;
893
894         srp_rsp = ioctx->buf;
895         memset(srp_rsp, 0, sizeof *srp_rsp);
896
897         limit_delta = atomic_read(&ch->req_lim_delta);
898         atomic_sub(limit_delta, &ch->req_lim_delta);
899
900         srp_rsp->opcode = SRP_RSP;
901         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
902         srp_rsp->tag = tag;
903
904         if (s_key != NO_SENSE) {
905                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
906                 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
907                 srp_rsp->sense_data_len =
908                     cpu_to_be32(sizeof *sense + (sizeof *sense % 4));
909
910                 sense = (struct sense_data *)(srp_rsp + 1);
911                 sense->err_code = 0x70;
912                 sense->key = s_key;
913                 sense->asc_ascq = s_code;
914         }
915 }
916
917 static void srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
918                                    struct srpt_ioctx *ioctx, u8 rsp_code,
919                                    u64 tag)
920 {
921         struct srp_rsp *srp_rsp;
922         int limit_delta;
923
924         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
925                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
926
927         srp_rsp = ioctx->buf;
928         memset(srp_rsp, 0, sizeof *srp_rsp);
929
930         limit_delta = atomic_read(&ch->req_lim_delta);
931         atomic_sub(limit_delta, &ch->req_lim_delta);
932
933         srp_rsp->opcode = SRP_RSP;
934         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
935         srp_rsp->tag = tag;
936
937         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
938                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
939                 srp_rsp->resp_data_len = cpu_to_be32(4);
940                 srp_rsp->data[3] = rsp_code;
941         }
942 }
943
944 /*
945  * Process SRP_CMD.
946  */
947 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
948 {
949         struct scst_cmd *scmnd = NULL;
950         struct srp_cmd *srp_cmd = NULL;
951         scst_data_direction dir = SCST_DATA_NONE;
952         int indirect_desc = 0;
953         int ret;
954         unsigned long flags;
955
956         srp_cmd = ioctx->buf;
957
958         if (srp_cmd->buf_fmt) {
959                 ret = srpt_get_desc_tbl(ioctx, srp_cmd, &indirect_desc);
960                 if (ret) {
961                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
962                                            NO_ADD_SENSE, srp_cmd->tag);
963                         ((struct srp_rsp *)ioctx->buf)->status =
964                                         SAM_STAT_TASK_SET_FULL;
965                         goto send_rsp;
966                 }
967
968                 if (indirect_desc) {
969                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
970                                            NO_ADD_SENSE, srp_cmd->tag);
971                         ((struct srp_rsp *)ioctx->buf)->status =
972                                         SAM_STAT_TASK_SET_FULL;
973                         goto send_rsp;
974                 }
975
976                 if (srp_cmd->buf_fmt & 0xf)
977                         dir = SCST_DATA_READ;
978                 else if (srp_cmd->buf_fmt >> 4)
979                         dir = SCST_DATA_WRITE;
980                 else
981                         dir = SCST_DATA_NONE;
982         } else
983                 dir = SCST_DATA_NONE;
984
985         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
986                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
987                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
988         if (!scmnd) {
989                 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
990                                    NO_ADD_SENSE, srp_cmd->tag);
991                 ((struct srp_rsp *)ioctx->buf)->status =
992                         SAM_STAT_TASK_SET_FULL;
993                 goto send_rsp;
994         }
995
996         ioctx->scmnd = scmnd;
997
998         switch (srp_cmd->task_attr) {
999         case SRP_CMD_HEAD_OF_Q:
1000                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
1001                 break;
1002         case SRP_CMD_ORDERED_Q:
1003                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1004                 break;
1005         case SRP_CMD_SIMPLE_Q:
1006                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
1007                 break;
1008         case SRP_CMD_ACA:
1009                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
1010                 break;
1011         default:
1012                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1013                 break;
1014         }
1015
1016         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1017         scst_cmd_set_tgt_priv(scmnd, ioctx);
1018         scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
1019
1020         spin_lock_irqsave(&ch->spinlock, flags);
1021         list_add_tail(&ioctx->scmnd_list, &ch->active_scmnd_list);
1022         ch->active_scmnd_cnt++;
1023         spin_unlock_irqrestore(&ch->spinlock, flags);
1024
1025         scst_cmd_init_done(scmnd, scst_estimate_context());
1026
1027         return 0;
1028
1029 send_rsp:
1030         return -1;
1031 }
1032
1033 /*
1034  * Process SRP_TSK_MGMT. See also table 19 in the T10 SRP r16a document.
1035  */
1036 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1037                                 struct srpt_ioctx *ioctx)
1038 {
1039         struct srp_tsk_mgmt *srp_tsk = NULL;
1040         struct srpt_mgmt_ioctx *mgmt_ioctx;
1041         int ret;
1042
1043         srp_tsk = ioctx->buf;
1044
1045         printk(KERN_WARNING PFX
1046                "recv_tsk_mgmt= %d for task_tag= %lld"
1047                " using tag= %lld cm_id= %p sess= %p\n",
1048                srp_tsk->tsk_mgmt_func,
1049                (unsigned long long) srp_tsk->task_tag,
1050                (unsigned long long) srp_tsk->tag,
1051                ch->cm_id, ch->scst_sess);
1052
1053         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1054         if (!mgmt_ioctx) {
1055                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1056                                        srp_tsk->tag);
1057                 goto send_rsp;
1058         }
1059
1060         mgmt_ioctx->ioctx = ioctx;
1061         mgmt_ioctx->ch = ch;
1062         mgmt_ioctx->tag = srp_tsk->tag;
1063
1064         switch (srp_tsk->tsk_mgmt_func) {
1065         case SRP_TSK_ABORT_TASK:
1066                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1067                                           SCST_ABORT_TASK,
1068                                           srp_tsk->task_tag,
1069                                           thread ?
1070                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1071                                           mgmt_ioctx);
1072                 break;
1073         case SRP_TSK_ABORT_TASK_SET:
1074                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1075                                           SCST_ABORT_TASK_SET,
1076                                           (u8 *) &srp_tsk->lun,
1077                                           sizeof srp_tsk->lun,
1078                                           thread ?
1079                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1080                                           mgmt_ioctx);
1081                 break;
1082         case SRP_TSK_CLEAR_TASK_SET:
1083                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1084                                           SCST_CLEAR_TASK_SET,
1085                                           (u8 *) &srp_tsk->lun,
1086                                           sizeof srp_tsk->lun,
1087                                           thread ?
1088                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1089                                           mgmt_ioctx);
1090                 break;
1091 #if 0
1092         case SRP_TSK_LUN_RESET:
1093                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1094                                           SCST_LUN_RESET,
1095                                           (u8 *) &srp_tsk->lun,
1096                                           sizeof srp_tsk->lun,
1097                                           thread ?
1098                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1099                                           mgmt_ioctx);
1100                 break;
1101 #endif
1102         case SRP_TSK_CLEAR_ACA:
1103                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1104                                           SCST_CLEAR_ACA,
1105                                           (u8 *) &srp_tsk->lun,
1106                                           sizeof srp_tsk->lun,
1107                                           thread ?
1108                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1109                                           mgmt_ioctx);
1110                 break;
1111         default:
1112                 srpt_build_tskmgmt_rsp(ch, ioctx,
1113                                        SRP_TSK_MGMT_FUNC_NOT_SUPP,
1114                                        srp_tsk->tag);
1115                 goto send_rsp;
1116         }
1117         return 0;
1118
1119 send_rsp:
1120         return -1;
1121 }
1122
1123 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1124                                struct srpt_ioctx *ioctx)
1125 {
1126         u8 op;
1127         unsigned long flags;
1128
1129         if (ch->state != RDMA_CHANNEL_LIVE) {
1130                 if (ch->state == RDMA_CHANNEL_CONNECTING) {
1131                         spin_lock_irqsave(&ch->spinlock, flags);
1132                         list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1133                         spin_unlock_irqrestore(&ch->spinlock, flags);
1134                 } else
1135                         srpt_reset_ioctx(ch, ioctx);
1136
1137                 return;
1138         }
1139
1140         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
1141                                 MAX_MESSAGE_SIZE, DMA_FROM_DEVICE);
1142
1143         ioctx->data_len = 0;
1144         ioctx->n_rbuf = 0;
1145         ioctx->rbufs = NULL;
1146         ioctx->n_rdma = 0;
1147         ioctx->n_rdma_ius = 0;
1148         ioctx->rdma_ius = NULL;
1149         ioctx->scmnd = NULL;
1150
1151         op = *(u8 *) ioctx->buf;
1152         switch (op) {
1153         case SRP_CMD:
1154                 if (srpt_handle_cmd(ch, ioctx) < 0)
1155                         goto send_rsp;
1156                 break;
1157
1158         case SRP_TSK_MGMT:
1159                 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1160                         goto send_rsp;
1161                 break;
1162
1163         case SRP_I_LOGOUT:
1164         case SRP_AER_REQ:
1165         default:
1166                 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1167                                    ((struct srp_cmd *)ioctx->buf)->tag);
1168
1169                 goto send_rsp;
1170         }
1171
1172         dma_sync_single_for_device(ch->sport->sdev->device->dma_device,
1173                                    ioctx->dma, MAX_MESSAGE_SIZE,
1174                                    DMA_FROM_DEVICE);
1175
1176         return;
1177
1178 send_rsp:
1179         if (ch->state != RDMA_CHANNEL_LIVE ||
1180             srpt_post_send(ch, ioctx,
1181                            sizeof(struct srp_rsp) +
1182                            be32_to_cpu(((struct srp_rsp *)ioctx->buf)->
1183                                        sense_data_len)))
1184                 srpt_reset_ioctx(ch, ioctx);
1185 }
1186
1187 /*
1188  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1189  * should stop.
1190  * @pre thread != 0
1191  */
1192 static inline int srpt_test_ioctx_list(void)
1193 {
1194         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1195                    unlikely(kthread_should_stop()));
1196         return res;
1197 }
1198
1199 /*
1200  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1201  *
1202  * @pre thread != 0
1203  */
1204 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1205 {
1206         unsigned long flags;
1207
1208         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1209         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1210         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1211         wake_up(&ioctx_list_waitQ);
1212 }
1213
1214 /*
1215  * InfiniBand CQ (completion queue) event handler for asynchronous events not
1216  * associated with a completion.
1217  */
1218 static void srpt_completion(struct ib_cq *cq, void *ctx)
1219 {
1220         struct srpt_rdma_ch *ch = ctx;
1221         struct srpt_device *sdev = ch->sport->sdev;
1222         struct ib_wc wc;
1223         struct srpt_ioctx *ioctx;
1224
1225         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1226         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1227                 if (wc.status) {
1228                         printk(KERN_ERR PFX "failed %s status= %d\n",
1229                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1230                                wc.status);
1231                         srpt_handle_err_comp(ch, &wc);
1232                         break;
1233                 }
1234
1235                 if (wc.wr_id & SRPT_OP_RECV) {
1236                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1237                         if (thread) {
1238                                 ioctx->ch = ch;
1239                                 ioctx->op = IB_WC_RECV;
1240                                 srpt_schedule_thread(ioctx);
1241                         } else
1242                                 srpt_handle_new_iu(ch, ioctx);
1243                         continue;
1244                 } else
1245                         ioctx = sdev->ioctx_ring[wc.wr_id];
1246
1247                 if (thread) {
1248                         ioctx->ch = ch;
1249                         ioctx->op = wc.opcode;
1250                         srpt_schedule_thread(ioctx);
1251                 } else {
1252                         switch (wc.opcode) {
1253                         case IB_WC_SEND:
1254                                 srpt_handle_send_comp(ch, ioctx,
1255                                         scst_estimate_context());
1256                                 break;
1257                         case IB_WC_RDMA_WRITE:
1258                         case IB_WC_RDMA_READ:
1259                                 srpt_handle_rdma_comp(ch, ioctx);
1260                                 break;
1261                         default:
1262                                 break;
1263                         }
1264                 }
1265         }
1266 }
1267
1268 /*
1269  * Create a completion queue on the specified device.
1270  */
1271 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1272 {
1273         struct ib_qp_init_attr *qp_init;
1274         struct srpt_device *sdev = ch->sport->sdev;
1275         int cqe;
1276         int ret;
1277
1278         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1279         if (!qp_init)
1280                 return -ENOMEM;
1281
1282         /* Create a completion queue (CQ). */
1283
1284         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1285 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1286         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1287 #else
1288         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1289 #endif
1290         if (IS_ERR(ch->cq)) {
1291                 ret = PTR_ERR(ch->cq);
1292                 printk(KERN_ERR PFX "failed to create_cq cqe= %d ret= %d\n",
1293                         cqe, ret);
1294                 goto out;
1295         }
1296
1297         /* Request completion notification. */
1298
1299         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1300
1301         /* Create a queue pair (QP). */
1302
1303         qp_init->qp_context = (void *)ch;
1304         qp_init->event_handler = srpt_qp_event;
1305         qp_init->send_cq = ch->cq;
1306         qp_init->recv_cq = ch->cq;
1307         qp_init->srq = sdev->srq;
1308         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1309         qp_init->qp_type = IB_QPT_RC;
1310         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1311         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1312
1313         ch->qp = ib_create_qp(sdev->pd, qp_init);
1314         if (IS_ERR(ch->qp)) {
1315                 ret = PTR_ERR(ch->qp);
1316                 ib_destroy_cq(ch->cq);
1317                 printk(KERN_ERR PFX "failed to create_qp ret= %d\n", ret);
1318                 goto out;
1319         }
1320
1321         printk(KERN_DEBUG PFX "%s: max_cqe= %d max_sge= %d cm_id= %p\n",
1322                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1323                ch->cm_id);
1324
1325         /* Modify the attributes and the state of queue pair ch->qp. */
1326
1327         ret = srpt_init_ch_qp(ch, ch->qp);
1328         if (ret) {
1329                 ib_destroy_qp(ch->qp);
1330                 ib_destroy_cq(ch->cq);
1331                 goto out;
1332         }
1333
1334         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1335 out:
1336         kfree(qp_init);
1337         return ret;
1338 }
1339
1340 static struct srpt_rdma_ch *srpt_find_channel(struct ib_cm_id *cm_id)
1341 {
1342         struct srpt_device *sdev = cm_id->context;
1343         struct srpt_rdma_ch *ch, *tmp_ch;
1344
1345         spin_lock_irq(&sdev->spinlock);
1346         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1347                 if (ch->cm_id == cm_id) {
1348                         spin_unlock_irq(&sdev->spinlock);
1349                         return ch;
1350                 }
1351         }
1352
1353         spin_unlock_irq(&sdev->spinlock);
1354
1355         return NULL;
1356 }
1357
1358 /** Release all resources associated with the specified RDMA channel. */
1359 static int srpt_release_channel(struct srpt_rdma_ch *ch, int destroy_cmid)
1360 {
1361         TRACE_ENTRY();
1362
1363         spin_lock_irq(&ch->sport->sdev->spinlock);
1364         list_del(&ch->list);
1365         spin_unlock_irq(&ch->sport->sdev->spinlock);
1366
1367         if (ch->cm_id && destroy_cmid) {
1368                 printk(KERN_WARNING PFX
1369                        "%s: destroy cm_id= %p\n", __func__, ch->cm_id);
1370                 ib_destroy_cm_id(ch->cm_id);
1371                 ch->cm_id = NULL;
1372         }
1373
1374         ib_destroy_qp(ch->qp);
1375         ib_destroy_cq(ch->cq);
1376
1377         if (ch->scst_sess) {
1378                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1379
1380                 printk(KERN_WARNING PFX
1381                        "%s: release sess= %p sess_name= %s active_cmd= %d\n",
1382                        __func__, ch->scst_sess, ch->sess_name,
1383                        ch->active_scmnd_cnt);
1384
1385                 spin_lock_irq(&ch->spinlock);
1386                 list_for_each_entry_safe(ioctx, ioctx_tmp,
1387                                          &ch->active_scmnd_list, scmnd_list) {
1388                         spin_unlock_irq(&ch->spinlock);
1389
1390                         if (ioctx->scmnd)
1391                                 srpt_abort_scst_cmd(ch->sport->sdev,
1392                                                     ioctx->scmnd, true);
1393
1394                         spin_lock_irq(&ch->spinlock);
1395                 }
1396                 WARN_ON(! list_empty(&ch->active_scmnd_list));
1397                 WARN_ON(ch->active_scmnd_cnt != 0);
1398                 spin_unlock_irq(&ch->spinlock);
1399
1400                 scst_unregister_session(ch->scst_sess, 0, NULL);
1401                 ch->scst_sess = NULL;
1402         }
1403
1404         kfree(ch);
1405
1406         TRACE_EXIT_RES(!destroy_cmid);
1407
1408         return destroy_cmid ? 0 : 1;
1409 }
1410
1411 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq)
1412 {
1413         spin_lock_irq(&ch->spinlock);
1414         ch->state = RDMA_CHANNEL_DISCONNECTING;
1415         spin_unlock_irq(&ch->spinlock);
1416
1417         if (dreq)
1418                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
1419         else
1420                 ib_send_cm_drep(ch->cm_id, NULL, 0);
1421
1422         return 0;
1423 }
1424
1425 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1426                             struct ib_cm_req_event_param *param,
1427                             void *private_data)
1428 {
1429         struct srpt_device *sdev = cm_id->context;
1430         struct srp_login_req *req;
1431         struct srp_login_rsp *rsp;
1432         struct srp_login_rej *rej;
1433         struct ib_cm_rep_param *rep_param;
1434         struct srpt_rdma_ch *ch, *tmp_ch;
1435         u32 it_iu_len;
1436         int ret = 0;
1437
1438         if (!sdev || !private_data)
1439                 return -EINVAL;
1440
1441         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1442         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1443         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1444
1445         if (!rsp || !rej || !rep_param) {
1446                 ret = -ENOMEM;
1447                 goto out;
1448         }
1449
1450         req = (struct srp_login_req *)private_data;
1451
1452         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1453
1454         printk(KERN_DEBUG PFX
1455                "Host login i_port_id=0x%llx:0x%llx t_port_id=0x%llx:0x%llx"
1456                " it_iu_len=%d\n",
1457                (unsigned long long)
1458                be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1459                (unsigned long long)
1460                be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1461                (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1462                (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1463                it_iu_len);
1464
1465         if (it_iu_len > MAX_MESSAGE_SIZE || it_iu_len < 64) {
1466                 rej->reason =
1467                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1468                 ret = -EINVAL;
1469                 printk(KERN_WARNING PFX
1470                        "Reject invalid it_iu_len=%d\n", it_iu_len);
1471                 goto reject;
1472         }
1473
1474         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1475                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1476
1477                 spin_lock_irq(&sdev->spinlock);
1478
1479                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1480                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1481                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1482                             && param->port == ch->sport->port
1483                             && param->listen_id == ch->sport->sdev->cm_id
1484                             && ch->cm_id) {
1485                                 /* found an existing channel */
1486                                 printk(KERN_WARNING PFX
1487                                        "Found existing channel name= %s"
1488                                        " cm_id= %p state= %d\n",
1489                                        ch->sess_name, ch->cm_id, ch->state);
1490
1491                                 spin_unlock_irq(&sdev->spinlock);
1492
1493                                 rsp->rsp_flags =
1494                                     SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1495
1496                                 if (ch->state == RDMA_CHANNEL_LIVE)
1497                                         srpt_disconnect_channel(ch, 1);
1498                                 else if (ch->state == RDMA_CHANNEL_CONNECTING) {
1499                                         ib_send_cm_rej(ch->cm_id,
1500                                                        IB_CM_REJ_NO_RESOURCES,
1501                                                        NULL, 0, NULL, 0);
1502                                         srpt_release_channel(ch, 1);
1503                                 }
1504
1505                                 spin_lock_irq(&sdev->spinlock);
1506                         }
1507                 }
1508
1509                 spin_unlock_irq(&sdev->spinlock);
1510
1511         } else
1512                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1513
1514         if (((u64) (*(u64 *) req->target_port_id) !=
1515              cpu_to_be64(mellanox_ioc_guid)) ||
1516             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1517              cpu_to_be64(mellanox_ioc_guid))) {
1518                 rej->reason =
1519                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1520                 ret = -ENOMEM;
1521                 printk(KERN_WARNING PFX "Reject invalid target_port_id\n");
1522                 goto reject;
1523         }
1524
1525         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1526         if (!ch) {
1527                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1528                 printk(KERN_WARNING PFX "Reject failed allocate rdma_ch\n");
1529                 ret = -ENOMEM;
1530                 goto reject;
1531         }
1532
1533         spin_lock_init(&ch->spinlock);
1534         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1535         memcpy(ch->t_port_id, req->target_port_id, 16);
1536         ch->sport = &sdev->port[param->port - 1];
1537         ch->cm_id = cm_id;
1538         ch->state = RDMA_CHANNEL_CONNECTING;
1539         INIT_LIST_HEAD(&ch->cmd_wait_list);
1540         INIT_LIST_HEAD(&ch->active_scmnd_list);
1541
1542         ret = srpt_create_ch_ib(ch);
1543         if (ret) {
1544                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1545                 printk(KERN_WARNING PFX "Reject failed to create rdma_ch\n");
1546                 goto free_ch;
1547         }
1548
1549         ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTR);
1550         if (ret) {
1551                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1552                 printk(KERN_WARNING PFX
1553                        "Reject failed qp to rtr/rts ret=%d\n", ret);
1554                 goto destroy_ib;
1555         }
1556
1557         snprintf(ch->sess_name, sizeof(ch->sess_name),
1558                  "0x%016llx%016llx",
1559                  (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1560                  (unsigned long long)be64_to_cpu(*(u64 *)(ch->i_port_id + 8)));
1561
1562         TRACE_DBG("registering session %s", ch->sess_name);
1563
1564         BUG_ON(!sdev->scst_tgt);
1565         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1566                                   NULL, NULL);
1567         if (!ch->scst_sess) {
1568                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1569                 printk(KERN_WARNING PFX "Failed to create scst sess\n");
1570                 goto destroy_ib;
1571         }
1572
1573         spin_lock_irq(&sdev->spinlock);
1574         list_add_tail(&ch->list, &sdev->rch_list);
1575         spin_unlock_irq(&sdev->spinlock);
1576
1577         printk(KERN_DEBUG PFX "Establish connection sess=%p name=%s cm_id=%p\n",
1578                ch->scst_sess, ch->sess_name, ch->cm_id);
1579
1580         scst_sess_set_tgt_priv(ch->scst_sess, ch);
1581
1582         /* create srp_login_response */
1583         rsp->opcode = SRP_LOGIN_RSP;
1584         rsp->tag = req->tag;
1585         rsp->max_it_iu_len = req->req_it_iu_len;
1586         rsp->max_ti_iu_len = req->req_it_iu_len;
1587         rsp->buf_fmt =
1588             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1589         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1590         atomic_set(&ch->req_lim_delta, 0);
1591
1592         /* create cm reply */
1593         rep_param->qp_num = ch->qp->qp_num;
1594         rep_param->private_data = (void *)rsp;
1595         rep_param->private_data_len = sizeof *rsp;
1596         rep_param->rnr_retry_count = 7;
1597         rep_param->flow_control = 1;
1598         rep_param->failover_accepted = 0;
1599         rep_param->srq = 1;
1600         rep_param->responder_resources = 4;
1601         rep_param->initiator_depth = 4;
1602
1603         ret = ib_send_cm_rep(cm_id, rep_param);
1604         if (ret)
1605                 srpt_release_channel(ch, 0);
1606
1607         goto out;
1608
1609 destroy_ib:
1610         ib_destroy_qp(ch->qp);
1611         ib_destroy_cq(ch->cq);
1612
1613 free_ch:
1614         kfree(ch);
1615
1616 reject:
1617         rej->opcode = SRP_LOGIN_REJ;
1618         rej->tag = req->tag;
1619         rej->buf_fmt =
1620             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1621
1622         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1623                              (void *)rej, sizeof *rej);
1624
1625 out:
1626         kfree(rep_param);
1627         kfree(rsp);
1628         kfree(rej);
1629
1630         return ret;
1631 }
1632
1633 static int srpt_find_and_release_channel(struct ib_cm_id *cm_id)
1634 {
1635         struct srpt_rdma_ch *ch;
1636
1637         ch = srpt_find_channel(cm_id);
1638         if (!ch)
1639                 return -EINVAL;
1640
1641         return srpt_release_channel(ch, 0);
1642 }
1643
1644 static int srpt_cm_rej_recv(struct ib_cm_id *cm_id)
1645 {
1646         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1647         return srpt_find_and_release_channel(cm_id);
1648 }
1649
1650 static int srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
1651 {
1652         struct srpt_rdma_ch *ch;
1653         int ret;
1654
1655         ch = srpt_find_channel(cm_id);
1656         if (!ch)
1657                 return -EINVAL;
1658
1659         if (ch->state == RDMA_CHANNEL_CONNECTING) {
1660                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1661
1662                 spin_lock_irq(&ch->spinlock);
1663                 ch->state = RDMA_CHANNEL_LIVE;
1664                 spin_unlock_irq(&ch->spinlock);
1665                 ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTS);
1666
1667                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
1668                                          wait_list) {
1669                         list_del(&ioctx->wait_list);
1670                         srpt_handle_new_iu(ch, ioctx);
1671                 }
1672         } else if (ch->state == RDMA_CHANNEL_DISCONNECTING)
1673                 ret = -EAGAIN;
1674         else
1675                 ret = 0;
1676
1677         if (ret) {
1678                 printk(KERN_ERR PFX "cm_id=%p sess_name=%s state=%d\n",
1679                        cm_id, ch->sess_name, ch->state);
1680                 srpt_disconnect_channel(ch, 1);
1681         }
1682
1683         return ret;
1684 }
1685
1686 static int srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
1687 {
1688         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1689         return srpt_find_and_release_channel(cm_id);
1690 }
1691
1692 static int srpt_cm_rep_error(struct ib_cm_id *cm_id)
1693 {
1694         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1695         return srpt_find_and_release_channel(cm_id);
1696 }
1697
1698 static int srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
1699 {
1700         struct srpt_rdma_ch *ch;
1701         int ret = 0;
1702
1703         ch = srpt_find_channel(cm_id);
1704
1705         if (!ch)
1706                 return -EINVAL;
1707
1708         printk(KERN_DEBUG PFX "%s: cm_id= %p ch->state= %d\n",
1709                  __func__, cm_id, ch->state);
1710
1711         switch (ch->state) {
1712         case RDMA_CHANNEL_LIVE:
1713         case RDMA_CHANNEL_CONNECTING:
1714                 ret = srpt_disconnect_channel(ch, 0);
1715                 break;
1716         case RDMA_CHANNEL_DISCONNECTING:
1717         default:
1718                 break;
1719         }
1720
1721         return ret;
1722 }
1723
1724 static int srpt_cm_drep_recv(struct ib_cm_id *cm_id)
1725 {
1726         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1727         return srpt_find_and_release_channel(cm_id);
1728 }
1729
1730 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1731 {
1732         int ret = 0;
1733
1734         switch (event->event) {
1735         case IB_CM_REQ_RECEIVED:
1736                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
1737                                        event->private_data);
1738                 break;
1739         case IB_CM_REJ_RECEIVED:
1740                 ret = srpt_cm_rej_recv(cm_id);
1741                 break;
1742         case IB_CM_RTU_RECEIVED:
1743         case IB_CM_USER_ESTABLISHED:
1744                 ret = srpt_cm_rtu_recv(cm_id);
1745                 break;
1746         case IB_CM_DREQ_RECEIVED:
1747                 ret = srpt_cm_dreq_recv(cm_id);
1748                 break;
1749         case IB_CM_DREP_RECEIVED:
1750                 ret = srpt_cm_drep_recv(cm_id);
1751                 break;
1752         case IB_CM_TIMEWAIT_EXIT:
1753                 ret = srpt_cm_timewait_exit(cm_id);
1754                 break;
1755         case IB_CM_REP_ERROR:
1756                 ret = srpt_cm_rep_error(cm_id);
1757                 break;
1758         default:
1759                 break;
1760         }
1761
1762         return ret;
1763 }
1764
1765 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1766                                  struct srpt_ioctx *ioctx,
1767                                  struct scst_cmd *scmnd)
1768 {
1769         struct scatterlist *scat;
1770         scst_data_direction dir;
1771         struct rdma_iu *riu;
1772         struct srp_direct_buf *db;
1773         dma_addr_t dma_addr;
1774         struct ib_sge *sge;
1775         u64 raddr;
1776         u32 rsize;
1777         u32 tsize;
1778         u32 dma_len;
1779         int count, nrdma;
1780         int i, j, k;
1781
1782         scat = scst_cmd_get_sg(scmnd);
1783         dir = scst_cmd_get_data_direction(scmnd);
1784         count = dma_map_sg(ch->sport->sdev->device->dma_device, scat,
1785                            scst_cmd_get_sg_cnt(scmnd),
1786                            scst_to_tgt_dma_dir(dir));
1787         if (unlikely(!count))
1788                 return -EBUSY;
1789
1790         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
1791                 nrdma = ioctx->n_rdma_ius;
1792         else {
1793                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
1794
1795                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
1796                                           scst_cmd_atomic(scmnd)
1797                                           ? GFP_ATOMIC : GFP_KERNEL);
1798                 if (!ioctx->rdma_ius) {
1799                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1800                                      scat, scst_cmd_get_sg_cnt(scmnd),
1801                                      scst_to_tgt_dma_dir(dir));
1802                         return -ENOMEM;
1803                 }
1804
1805                 ioctx->n_rdma_ius = nrdma;
1806         }
1807
1808         db = ioctx->rbufs;
1809         tsize = (dir == SCST_DATA_READ) ?
1810                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1811         dma_len = sg_dma_len(&scat[0]);
1812         riu = ioctx->rdma_ius;
1813
1814         /*
1815          * For each remote desc - calculate the #ib_sge.
1816          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
1817          *      each remote desc rdma_iu is required a rdma wr;
1818          * else
1819          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
1820          *      another rdma wr
1821          */
1822         for (i = 0, j = 0;
1823              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1824                 rsize = be32_to_cpu(db->len);
1825                 raddr = be64_to_cpu(db->va);
1826                 riu->raddr = raddr;
1827                 riu->rkey = be32_to_cpu(db->key);
1828                 riu->sge_cnt = 0;
1829
1830                 /* calculate how many sge required for this remote_buf */
1831                 while (rsize > 0 && tsize > 0) {
1832
1833                         if (rsize >= dma_len) {
1834                                 tsize -= dma_len;
1835                                 rsize -= dma_len;
1836                                 raddr += dma_len;
1837
1838                                 if (tsize > 0) {
1839                                         ++j;
1840                                         if (j < count)
1841                                                 dma_len = sg_dma_len(&scat[j]);
1842                                 }
1843                         } else {
1844                                 tsize -= rsize;
1845                                 dma_len -= rsize;
1846                                 rsize = 0;
1847                         }
1848
1849                         ++riu->sge_cnt;
1850
1851                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
1852                                 riu->sge =
1853                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
1854                                             scst_cmd_atomic(scmnd)
1855                                             ? GFP_ATOMIC : GFP_KERNEL);
1856                                 if (!riu->sge)
1857                                         goto free_mem;
1858
1859                                 ++ioctx->n_rdma;
1860                                 ++riu;
1861                                 riu->sge_cnt = 0;
1862                                 riu->raddr = raddr;
1863                                 riu->rkey = be32_to_cpu(db->key);
1864                         }
1865                 }
1866
1867                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
1868                                    scst_cmd_atomic(scmnd)
1869                                    ? GFP_ATOMIC : GFP_KERNEL);
1870
1871                 if (!riu->sge)
1872                         goto free_mem;
1873
1874                 ++ioctx->n_rdma;
1875         }
1876
1877         db = ioctx->rbufs;
1878         scat = scst_cmd_get_sg(scmnd);
1879         tsize = (dir == SCST_DATA_READ) ?
1880                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1881         riu = ioctx->rdma_ius;
1882         dma_len = sg_dma_len(&scat[0]);
1883         dma_addr = sg_dma_address(&scat[0]);
1884
1885         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
1886         for (i = 0, j = 0;
1887              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1888                 rsize = be32_to_cpu(db->len);
1889                 sge = riu->sge;
1890                 k = 0;
1891
1892                 while (rsize > 0 && tsize > 0) {
1893                         sge->addr = dma_addr;
1894                         sge->lkey = ch->sport->sdev->mr->lkey;
1895
1896                         if (rsize >= dma_len) {
1897                                 sge->length =
1898                                         (tsize < dma_len) ? tsize : dma_len;
1899                                 tsize -= dma_len;
1900                                 rsize -= dma_len;
1901
1902                                 if (tsize > 0) {
1903                                         ++j;
1904                                         if (j < count) {
1905                                                 dma_len = sg_dma_len(&scat[j]);
1906                                                 dma_addr =
1907                                                     sg_dma_address(&scat[j]);
1908                                         }
1909                                 }
1910                         } else {
1911                                 sge->length = (tsize < rsize) ? tsize : rsize;
1912                                 tsize -= rsize;
1913                                 dma_len -= rsize;
1914                                 dma_addr += rsize;
1915                                 rsize = 0;
1916                         }
1917
1918                         ++k;
1919                         if (k == riu->sge_cnt && rsize > 0) {
1920                                 ++riu;
1921                                 sge = riu->sge;
1922                                 k = 0;
1923                         } else if (rsize > 0)
1924                                 ++sge;
1925                 }
1926         }
1927
1928         return 0;
1929
1930 free_mem:
1931         while (ioctx->n_rdma)
1932                 kfree(ioctx->rdma_ius[ioctx->n_rdma--].sge);
1933
1934         kfree(ioctx->rdma_ius);
1935
1936         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1937                      scat, scst_cmd_get_sg_cnt(scmnd),
1938                      scst_to_tgt_dma_dir(dir));
1939
1940         return -ENOMEM;
1941 }
1942
1943 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1944                               scst_data_direction dir)
1945 {
1946         struct ib_send_wr wr;
1947         struct ib_send_wr *bad_wr;
1948         struct rdma_iu *riu;
1949         int i;
1950         int ret = 0;
1951
1952         riu = ioctx->rdma_ius;
1953         memset(&wr, 0, sizeof wr);
1954
1955         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
1956                 wr.opcode = (dir == SCST_DATA_READ) ?
1957                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
1958                 wr.next = NULL;
1959                 wr.wr_id = ioctx->index;
1960                 wr.wr.rdma.remote_addr = riu->raddr;
1961                 wr.wr.rdma.rkey = riu->rkey;
1962                 wr.num_sge = riu->sge_cnt;
1963                 wr.sg_list = riu->sge;
1964
1965                 /* only get completion event for the last rdma wr */
1966                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
1967                         wr.send_flags = IB_SEND_SIGNALED;
1968
1969                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
1970                 if (ret)
1971                         break;
1972         }
1973
1974         return ret;
1975 }
1976
1977 /*
1978  * Start data reception. Must not block.
1979  */
1980 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1981                           struct scst_cmd *scmnd)
1982 {
1983         int ret;
1984
1985         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
1986         if (ret) {
1987                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1988                 ret = SCST_TGT_RES_QUEUE_FULL;
1989                 goto out;
1990         }
1991
1992         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
1993         if (ret) {
1994                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1995                 if (ret == -EAGAIN || ret == -ENOMEM)
1996                         ret = SCST_TGT_RES_QUEUE_FULL;
1997                 else
1998                         ret = SCST_TGT_RES_FATAL_ERROR;
1999                 goto out;
2000         }
2001
2002         ret = SCST_TGT_RES_SUCCESS;
2003
2004 out:
2005         return ret;
2006 }
2007
2008 /*
2009  * Called by the SCST core to inform ib_srpt that data reception should start.
2010  * Must not block.
2011  */
2012 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
2013 {
2014         struct srpt_rdma_ch *ch;
2015         struct srpt_ioctx *ioctx;
2016
2017         ioctx = scst_cmd_get_tgt_priv(scmnd);
2018         BUG_ON(!ioctx);
2019
2020         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2021         BUG_ON(!ch);
2022
2023         if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2024                 return SCST_TGT_RES_FATAL_ERROR;
2025         else if (ch->state == RDMA_CHANNEL_CONNECTING)
2026                 return SCST_TGT_RES_QUEUE_FULL;
2027
2028         return srpt_xfer_data(ch, ioctx, scmnd);
2029 }
2030
2031 /*
2032  * Called by the SCST core. Transmits the response buffer and status held in
2033  * 'scmnd'. Must not block.
2034  */
2035 static int srpt_xmit_response(struct scst_cmd *scmnd)
2036 {
2037         struct srpt_rdma_ch *ch;
2038         struct srpt_ioctx *ioctx;
2039         struct srp_rsp *srp_rsp;
2040         u64 tag;
2041         int ret = SCST_TGT_RES_SUCCESS;
2042         int dir;
2043         int status;
2044
2045         ioctx = scst_cmd_get_tgt_priv(scmnd);
2046         BUG_ON(!ioctx);
2047
2048         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2049         BUG_ON(!ch);
2050
2051         tag = scst_cmd_get_tag(scmnd);
2052
2053         if (ch->state != RDMA_CHANNEL_LIVE) {
2054                 printk(KERN_ERR PFX
2055                        "%s: tag= %lld channel in bad state %d\n",
2056                        __func__, (unsigned long long)tag, ch->state);
2057
2058                 if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2059                         ret = SCST_TGT_RES_FATAL_ERROR;
2060                 else if (ch->state == RDMA_CHANNEL_CONNECTING)
2061                         ret = SCST_TGT_RES_QUEUE_FULL;
2062
2063                 if (unlikely(scst_cmd_aborted(scmnd)))
2064                         goto out_aborted;
2065
2066                 goto out;
2067         }
2068
2069         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
2070                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
2071
2072         srp_rsp = ioctx->buf;
2073
2074         if (unlikely(scst_cmd_aborted(scmnd))) {
2075                 printk(KERN_ERR PFX
2076                        "%s: tag= %lld already get aborted\n",
2077                        __func__, (unsigned long long)tag);
2078                 goto out_aborted;
2079         }
2080
2081         dir = scst_cmd_get_data_direction(scmnd);
2082         status = scst_cmd_get_status(scmnd) & 0xff;
2083
2084         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2085
2086         if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2087                 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2088                 if (srp_rsp->sense_data_len >
2089                     (MAX_MESSAGE_SIZE - sizeof *srp_rsp))
2090                         srp_rsp->sense_data_len =
2091                             MAX_MESSAGE_SIZE - sizeof *srp_rsp;
2092
2093                 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2094                        srp_rsp->sense_data_len);
2095
2096                 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2097                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2098
2099                 if (!status)
2100                         status = SAM_STAT_CHECK_CONDITION;
2101         }
2102
2103         srp_rsp->status = status;
2104
2105         /* transfer read data if any */
2106         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2107                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2108                 if (ret != SCST_TGT_RES_SUCCESS) {
2109                         printk(KERN_ERR PFX
2110                                "%s: tag= %lld xfer_data failed\n",
2111                                __func__, (unsigned long long)tag);
2112                         goto out;
2113                 }
2114         }
2115
2116         if (srpt_post_send(ch, ioctx,
2117                            sizeof *srp_rsp +
2118                            be32_to_cpu(srp_rsp->sense_data_len))) {
2119                 printk(KERN_ERR PFX "%s: ch->state= %d tag= %lld\n",
2120                        __func__, ch->state,
2121                        (unsigned long long)tag);
2122                 ret = SCST_TGT_RES_FATAL_ERROR;
2123         }
2124
2125 out:
2126         return ret;
2127
2128 out_aborted:
2129         ret = SCST_TGT_RES_SUCCESS;
2130         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
2131         scst_tgt_cmd_done(scmnd, SCST_CONTEXT_SAME);
2132         goto out;
2133 }
2134
2135 /*
2136  * Called by the SCST core to inform ib_srpt that a received task management
2137  * function has been completed. Must not block.
2138  */
2139 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2140 {
2141         struct srpt_rdma_ch *ch;
2142         struct srpt_mgmt_ioctx *mgmt_ioctx;
2143         struct srpt_ioctx *ioctx;
2144
2145         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2146         BUG_ON(!mgmt_ioctx);
2147
2148         ch = mgmt_ioctx->ch;
2149         BUG_ON(!ch);
2150
2151         ioctx = mgmt_ioctx->ioctx;
2152         BUG_ON(!ioctx);
2153
2154         printk(KERN_WARNING PFX
2155                "%s: tsk_mgmt_done for tag= %lld status=%d\n",
2156                __func__, (unsigned long long)mgmt_ioctx->tag,
2157                scst_mgmt_cmd_get_status(mcmnd));
2158
2159         srpt_build_tskmgmt_rsp(ch, ioctx,
2160                                (scst_mgmt_cmd_get_status(mcmnd) ==
2161                                 SCST_MGMT_STATUS_SUCCESS) ?
2162                                SRP_TSK_MGMT_SUCCESS : SRP_TSK_MGMT_FAILED,
2163                                mgmt_ioctx->tag);
2164         srpt_post_send(ch, ioctx, sizeof(struct srp_rsp) + 4);
2165
2166         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2167
2168         kfree(mgmt_ioctx);
2169 }
2170
2171 /*
2172  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2173  * to be freed. May be called in IRQ context.
2174  */
2175 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2176 {
2177         struct srpt_rdma_ch *ch;
2178         struct srpt_ioctx *ioctx;
2179
2180         ioctx = scst_cmd_get_tgt_priv(scmnd);
2181         BUG_ON(!ioctx);
2182
2183         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2184         BUG_ON(!ch);
2185
2186         spin_lock_irq(&ch->spinlock);
2187         list_del(&ioctx->scmnd_list);
2188         ch->active_scmnd_cnt--;
2189         spin_unlock_irq(&ch->spinlock);
2190
2191         srpt_reset_ioctx(ch, ioctx);
2192         scst_cmd_set_tgt_priv(scmnd, NULL);
2193 }
2194
2195 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2196 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2197 static void srpt_refresh_port_work(void *ctx)
2198 #else
2199 static void srpt_refresh_port_work(struct work_struct *work)
2200 #endif
2201 {
2202 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2203         struct srpt_port *sport = (struct srpt_port *)ctx;
2204 #else
2205         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2206 #endif
2207
2208         srpt_refresh_port(sport);
2209 }
2210
2211 /*
2212  * Called by the SCST core to detect target adapters. Returns the number of
2213  * detected target adapters.
2214  */
2215 static int srpt_detect(struct scst_tgt_template *tp)
2216 {
2217         struct srpt_device *sdev;
2218         int count = 0;
2219
2220         TRACE_ENTRY();
2221
2222         list_for_each_entry(sdev, &srpt_devices, list)
2223                 ++count;
2224
2225         TRACE_EXIT();
2226
2227         return count;
2228 }
2229
2230 /*
2231  * Callback function called by the SCST core from scst_unregister() to free up
2232  * the resources associated with device scst_tgt.
2233  */
2234 static int srpt_release(struct scst_tgt *scst_tgt)
2235 {
2236         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2237         struct srpt_rdma_ch *ch, *tmp_ch;
2238
2239         TRACE_ENTRY();
2240
2241         BUG_ON(!scst_tgt);
2242 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2243         WARN_ON(!sdev);
2244         if (!sdev)
2245                 return -ENODEV;
2246 #else
2247         if (WARN_ON(!sdev))
2248                 return -ENODEV;
2249 #endif
2250
2251         srpt_unregister_procfs_entry(scst_tgt->tgtt);
2252
2253         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list)
2254             srpt_release_channel(ch, 1);
2255
2256         srpt_unregister_mad_agent(sdev);
2257
2258         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2259
2260         TRACE_EXIT();
2261
2262         return 0;
2263 }
2264
2265 /*
2266  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2267  * when the module parameter 'thread' is not zero (the default is zero).
2268  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2269  *
2270  * @pre thread != 0
2271  */
2272 static int srpt_ioctx_thread(void *arg)
2273 {
2274         struct srpt_ioctx *ioctx;
2275
2276         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2277         current->flags |= PF_NOFREEZE;
2278
2279         spin_lock_irq(&srpt_thread.thread_lock);
2280         while (!kthread_should_stop()) {
2281                 wait_queue_t wait;
2282                 init_waitqueue_entry(&wait, current);
2283
2284                 if (!srpt_test_ioctx_list()) {
2285                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2286
2287                         for (;;) {
2288                                 set_current_state(TASK_INTERRUPTIBLE);
2289                                 if (srpt_test_ioctx_list())
2290                                         break;
2291                                 spin_unlock_irq(&srpt_thread.thread_lock);
2292                                 schedule();
2293                                 spin_lock_irq(&srpt_thread.thread_lock);
2294                         }
2295                         set_current_state(TASK_RUNNING);
2296                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2297                 }
2298
2299                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2300                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2301                                            struct srpt_ioctx, comp_list);
2302
2303                         list_del(&ioctx->comp_list);
2304
2305                         spin_unlock_irq(&srpt_thread.thread_lock);
2306                         switch (ioctx->op) {
2307                         case IB_WC_SEND:
2308                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2309                                         SCST_CONTEXT_DIRECT);
2310                                 break;
2311                         case IB_WC_RDMA_WRITE:
2312                         case IB_WC_RDMA_READ:
2313                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2314                                 break;
2315                         case IB_WC_RECV:
2316                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2317                                 break;
2318                         default:
2319                                 break;
2320                         }
2321                         spin_lock_irq(&srpt_thread.thread_lock);
2322                 }
2323         }
2324         spin_unlock_irq(&srpt_thread.thread_lock);
2325
2326         return 0;
2327 }
2328
2329 /* SCST target template for the SRP target implementation. */
2330 static struct scst_tgt_template srpt_template = {
2331         .name = DRV_NAME,
2332         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2333         .xmit_response_atomic = 1,
2334         .rdy_to_xfer_atomic = 1,
2335         .no_proc_entry = 0,
2336         .detect = srpt_detect,
2337         .release = srpt_release,
2338         .xmit_response = srpt_xmit_response,
2339         .rdy_to_xfer = srpt_rdy_to_xfer,
2340         .on_free_cmd = srpt_on_free_cmd,
2341         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2342 };
2343
2344 /*
2345  * The callback function srpt_release_class_dev() is called whenever a
2346  * device is removed from the /sys/class/infiniband_srpt device class.
2347  */
2348 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2349 static void srpt_release_class_dev(struct class_device *class_dev)
2350 #else
2351 static void srpt_release_class_dev(struct device *dev)
2352 #endif
2353 {
2354 }
2355
2356 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2357 static int srpt_trace_level_show(struct seq_file *seq, void *v)
2358 {
2359         return scst_proc_log_entry_read(seq, trace_flag, NULL);
2360 }
2361
2362 static ssize_t srpt_proc_trace_level_write(struct file *file,
2363         const char __user *buf, size_t length, loff_t *off)
2364 {
2365         return scst_proc_log_entry_write(file, buf, length, &trace_flag,
2366                 DEFAULT_SRPT_TRACE_FLAGS, NULL);
2367 }
2368
2369 static struct scst_proc_data srpt_log_proc_data = {
2370         SCST_DEF_RW_SEQ_OP(srpt_proc_trace_level_write)
2371         .show = srpt_trace_level_show,
2372 };
2373 #endif
2374
2375 static struct class_attribute srpt_class_attrs[] = {
2376         __ATTR_NULL,
2377 };
2378
2379 static struct class srpt_class = {
2380         .name = "infiniband_srpt",
2381 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2382         .release = srpt_release_class_dev,
2383 #else
2384         .dev_release = srpt_release_class_dev,
2385 #endif
2386         .class_attrs = srpt_class_attrs,
2387 };
2388
2389 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2390 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2391 #else
2392 static ssize_t show_login_info(struct device *dev,
2393                                struct device_attribute *attr, char *buf)
2394 #endif
2395 {
2396         struct srpt_device *sdev =
2397 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2398                 container_of(class_dev, struct srpt_device, class_dev);
2399 #else
2400                 container_of(dev, struct srpt_device, dev);
2401 #endif
2402         struct srpt_port *sport;
2403         int i;
2404         int len = 0;
2405
2406         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2407                 sport = &sdev->port[i];
2408
2409                 len += sprintf(buf + len,
2410                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2411                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2412                                "service_id=%016llx\n",
2413                                (unsigned long long) mellanox_ioc_guid,
2414                                (unsigned long long) mellanox_ioc_guid,
2415                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2416                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2417                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2418                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2419                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2420                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2421                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2422                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2423                                (unsigned long long) mellanox_ioc_guid);
2424         }
2425
2426         return len;
2427 }
2428
2429 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2430 static CLASS_DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2431 #else
2432 static DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2433 #endif
2434
2435 /*
2436  * Callback function called by the InfiniBand core when either an InfiniBand
2437  * device has been added or during the ib_register_client() call for each
2438  * registered InfiniBand device.
2439  */
2440 static void srpt_add_one(struct ib_device *device)
2441 {
2442         struct srpt_device *sdev;
2443         struct srpt_port *sport;
2444         struct ib_srq_init_attr srq_attr;
2445         int i;
2446
2447         TRACE_ENTRY();
2448
2449         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2450         if (!sdev)
2451                 return;
2452
2453         sdev->device = device;
2454
2455 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2456         sdev->class_dev.class = &srpt_class;
2457         sdev->class_dev.dev = device->dma_device;
2458         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2459                  "srpt-%s", device->name);
2460 #else
2461         sdev->dev.class = &srpt_class;
2462         sdev->dev.parent = device->dma_device;
2463 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2464         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2465 #else
2466         snprintf(sdev->init_name, sizeof(sdev->init_name),
2467                  "srpt-%s", device->name);
2468         sdev->dev.init_name = sdev->init_name;
2469 #endif
2470 #endif
2471
2472 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2473         if (class_device_register(&sdev->class_dev))
2474                 goto free_dev;
2475         if (class_device_create_file(&sdev->class_dev,
2476                                      &class_device_attr_login_info))
2477                 goto err_dev;
2478 #else
2479         if (device_register(&sdev->dev))
2480                 goto free_dev;
2481         if (device_create_file(&sdev->dev, &dev_attr_login_info))
2482                 goto err_dev;
2483 #endif
2484
2485         if (ib_query_device(device, &sdev->dev_attr))
2486                 goto err_dev;
2487
2488         sdev->pd = ib_alloc_pd(device);
2489         if (IS_ERR(sdev->pd))
2490                 goto err_dev;
2491
2492         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2493         if (IS_ERR(sdev->mr))
2494                 goto err_pd;
2495
2496         srq_attr.event_handler = srpt_srq_event;
2497         srq_attr.srq_context = (void *)sdev;
2498         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2499         srq_attr.attr.max_sge = 1;
2500         srq_attr.attr.srq_limit = 0;
2501
2502         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2503         if (IS_ERR(sdev->srq))
2504                 goto err_mr;
2505
2506         printk(KERN_DEBUG PFX "%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
2507                __func__, srq_attr.attr.max_wr,
2508               sdev->dev_attr.max_srq_wr, device->name);
2509
2510         if (!mellanox_ioc_guid)
2511                 mellanox_ioc_guid = be64_to_cpu(device->node_guid);
2512
2513         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2514         if (IS_ERR(sdev->cm_id))
2515                 goto err_srq;
2516
2517         /* print out target login information */
2518         printk(KERN_DEBUG PFX "Target login info: id_ext=%016llx,"
2519                 "ioc_guid=%016llx,pkey=ffff,service_id=%016llx\n",
2520                 (unsigned long long) mellanox_ioc_guid,
2521                 (unsigned long long) mellanox_ioc_guid,
2522                 (unsigned long long) mellanox_ioc_guid);
2523
2524         /*
2525          * We do not have a consistent service_id (ie. also id_ext of target_id)
2526          * to identify this target. We currently use the guid of the first HCA
2527          * in the system as service_id; therefore, the target_id will change
2528          * if this HCA is gone bad and replaced by different HCA
2529          */
2530         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(mellanox_ioc_guid), 0, NULL))
2531                 goto err_cm;
2532
2533         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2534                               srpt_event_handler);
2535         if (ib_register_event_handler(&sdev->event_handler))
2536                 goto err_cm;
2537
2538         if (srpt_alloc_ioctx_ring(sdev))
2539                 goto err_event;
2540
2541         INIT_LIST_HEAD(&sdev->rch_list);
2542         spin_lock_init(&sdev->spinlock);
2543
2544         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2545                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2546
2547         list_add_tail(&sdev->list, &srpt_devices);
2548
2549         ib_set_client_data(device, &srpt_client, sdev);
2550
2551         sdev->scst_tgt = scst_register(&srpt_template, NULL);
2552         if (!sdev->scst_tgt) {
2553                 printk(KERN_ERR PFX "SCST registration failed for %s.\n",
2554                         sdev->device->name);
2555                 goto err_ring;
2556         }
2557
2558         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
2559
2560         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
2561                 sport = &sdev->port[i - 1];
2562                 sport->sdev = sdev;
2563                 sport->port = i;
2564 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2565                 /*
2566                  * A vanilla 2.6.19 or older kernel without backported OFED
2567                  * kernel headers.
2568                  */
2569                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
2570 #else
2571                 INIT_WORK(&sport->work, srpt_refresh_port_work);
2572 #endif
2573                 if (srpt_refresh_port(sport)) {
2574                         printk(KERN_ERR PFX "MAD registration failed"
2575                                " for %s-%d.\n", sdev->device->name, i);
2576                         goto err_refresh_port;
2577                 }
2578         }
2579
2580         TRACE_EXIT();
2581
2582         return;
2583
2584 err_refresh_port:
2585         scst_unregister(sdev->scst_tgt);
2586 err_ring:
2587         ib_set_client_data(device, &srpt_client, NULL);
2588         list_del(&sdev->list);
2589         srpt_free_ioctx_ring(sdev);
2590 err_event:
2591         ib_unregister_event_handler(&sdev->event_handler);
2592 err_cm:
2593         ib_destroy_cm_id(sdev->cm_id);
2594 err_srq:
2595         ib_destroy_srq(sdev->srq);
2596 err_mr:
2597         ib_dereg_mr(sdev->mr);
2598 err_pd:
2599         ib_dealloc_pd(sdev->pd);
2600 err_dev:
2601 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2602         class_device_unregister(&sdev->class_dev);
2603 #else
2604         device_unregister(&sdev->dev);
2605 #endif
2606 free_dev:
2607         kfree(sdev);
2608
2609         TRACE_EXIT();
2610 }
2611
2612 /*
2613  * Callback function called by the InfiniBand core when either an InfiniBand
2614  * device has been removed or during the ib_unregister_client() call for each
2615  * registered InfiniBand device.
2616  */
2617 static void srpt_remove_one(struct ib_device *device)
2618 {
2619         int i;
2620         struct srpt_device *sdev;
2621
2622         TRACE_ENTRY();
2623
2624         sdev = ib_get_client_data(device, &srpt_client);
2625 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2626         WARN_ON(!sdev);
2627         if (!sdev)
2628                 return;
2629 #else
2630         if (WARN_ON(!sdev))
2631                 return;
2632 #endif
2633
2634         /*
2635          * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
2636          * finished if it is running.
2637          */
2638         for (i = 0; i < sdev->device->phys_port_cnt; i++)
2639                 cancel_work_sync(&sdev->port[i].work);
2640
2641         scst_unregister(sdev->scst_tgt);
2642         sdev->scst_tgt = NULL;
2643
2644         ib_unregister_event_handler(&sdev->event_handler);
2645         ib_destroy_cm_id(sdev->cm_id);
2646         ib_destroy_srq(sdev->srq);
2647         ib_dereg_mr(sdev->mr);
2648         ib_dealloc_pd(sdev->pd);
2649 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2650         class_device_unregister(&sdev->class_dev);
2651 #else
2652         device_unregister(&sdev->dev);
2653 #endif
2654
2655         srpt_free_ioctx_ring(sdev);
2656         list_del(&sdev->list);
2657         kfree(sdev);
2658
2659         TRACE_EXIT();
2660 }
2661
2662 /**
2663  * Create procfs entries for srpt. Currently the only procfs entry created
2664  * by this function is the "trace_level" entry.
2665  */
2666 static int srpt_register_procfs_entry(struct scst_tgt_template *tgt)
2667 {
2668         int res = 0;
2669 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2670         struct proc_dir_entry *p, *root;
2671
2672         root = scst_proc_get_tgt_root(tgt);
2673         WARN_ON(!root);
2674         if (root) {
2675                 /*
2676                  * Fill in the scst_proc_data::data pointer, which is used in
2677                  * a printk(KERN_INFO ...) statement in
2678                  * scst_proc_log_entry_write() in scst_proc.c.
2679                  */
2680                 srpt_log_proc_data.data = (char *)tgt->name;
2681                 p = scst_create_proc_entry(root, SRPT_PROC_TRACE_LEVEL_NAME,
2682                                            &srpt_log_proc_data);
2683                 if (!p)
2684                         res = -ENOMEM;
2685         } else
2686                 res = -ENOMEM;
2687
2688 #endif
2689         return res;
2690 }
2691
2692 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt)
2693 {
2694 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2695         struct proc_dir_entry *root;
2696
2697         root = scst_proc_get_tgt_root(tgt);
2698         WARN_ON(!root);
2699         if (root)
2700                 remove_proc_entry(SRPT_PROC_TRACE_LEVEL_NAME, root);
2701 #endif
2702 }
2703
2704 /*
2705  * Module initialization.
2706  *
2707  * Note: since ib_register_client() registers callback functions, and since at
2708  * least one of these callback functions (srpt_add_one()) calls SCST functions,
2709  * the SCST target template must be registered before ib_register_client() is
2710  * called.
2711  */
2712 static int __init srpt_init_module(void)
2713 {
2714         int ret;
2715
2716         INIT_LIST_HEAD(&srpt_devices);
2717
2718         ret = class_register(&srpt_class);
2719         if (ret) {
2720                 printk(KERN_ERR PFX "couldn't register class ib_srpt\n");
2721                 goto out;
2722         }
2723
2724         ret = scst_register_target_template(&srpt_template);
2725         if (ret < 0) {
2726                 printk(KERN_ERR PFX "couldn't register with scst\n");
2727                 ret = -ENODEV;
2728                 goto out_unregister_class;
2729         }
2730
2731         ret = srpt_register_procfs_entry(&srpt_template);
2732         if (ret) {
2733                 printk(KERN_ERR PFX "couldn't register procfs entry\n");
2734                 goto out_unregister_target;
2735         }
2736
2737         ret = ib_register_client(&srpt_client);
2738         if (ret) {
2739                 printk(KERN_ERR PFX "couldn't register IB client\n");
2740                 goto out_unregister_target;
2741         }
2742
2743         if (thread) {
2744                 spin_lock_init(&srpt_thread.thread_lock);
2745                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
2746                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
2747                                                  NULL, "srpt_thread");
2748                 if (IS_ERR(srpt_thread.thread)) {
2749                         srpt_thread.thread = NULL;
2750                         thread = 0;
2751                 }
2752         }
2753
2754         return 0;
2755
2756 out_unregister_target:
2757         /*
2758          * Note: the procfs entry is unregistered in srpt_release(), which is
2759          * called by scst_unregister_target_template().
2760          */
2761         scst_unregister_target_template(&srpt_template);
2762 out_unregister_class:
2763         class_unregister(&srpt_class);
2764 out:
2765         return ret;
2766 }
2767
2768 static void __exit srpt_cleanup_module(void)
2769 {
2770         TRACE_ENTRY();
2771
2772         if (srpt_thread.thread)
2773                 kthread_stop(srpt_thread.thread);
2774         ib_unregister_client(&srpt_client);
2775         scst_unregister_target_template(&srpt_template);
2776         class_unregister(&srpt_class);
2777
2778         TRACE_EXIT();
2779 }
2780
2781 module_init(srpt_init_module);
2782 module_exit(srpt_cleanup_module);