- Fixed a buffer overrun: a service entry name occupying 35 bytes was
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43 #include <asm/atomic.h>
44 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #endif
48 #include "ib_srpt.h"
49 #include "scst_debug.h"
50
51 /* Name of this kernel module. */
52 #define DRV_NAME                "ib_srpt"
53 /* Prefix for printk() kernel messages. */
54 #define PFX                     DRV_NAME ": "
55 #define DRV_VERSION             "1.0.1"
56 #define DRV_RELDATE             "July 10, 2008"
57 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
58 /* Flags to be used in SCST debug tracing statements. */
59 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
60                                   | TRACE_MGMT | TRACE_SPECIAL)
61 /* Name of the entry that will be created under /proc/scsi_tgt/ib_srpt. */
62 #define SRPT_PROC_TRACE_LEVEL_NAME      "trace_level"
63 #endif
64
65 #define MELLANOX_SRPT_ID_STRING "Mellanox OFED SRP target"
66
67 MODULE_AUTHOR("Vu Pham");
68 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
69                    "v" DRV_VERSION " (" DRV_RELDATE ")");
70 MODULE_LICENSE("Dual BSD/GPL");
71
72 struct srpt_thread {
73         /* Protects thread_ioctx_list. */
74         spinlock_t thread_lock;
75         /* I/O contexts to be processed by the kernel thread. */
76         struct list_head thread_ioctx_list;
77         /* SRPT kernel thread. */
78         struct task_struct *thread;
79 };
80
81 /*
82  * Global Variables
83  */
84
85 static u64 mellanox_ioc_guid;
86 /* List of srpt_device structures. */
87 static struct list_head srpt_devices;
88 static int thread;
89 static struct srpt_thread srpt_thread;
90 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
91 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
92 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
93 module_param(trace_flag, long, 0644);
94 MODULE_PARM_DESC(trace_flag,
95                  "Trace flags for the ib_srpt kernel module.");
96 #endif
97
98 module_param(thread, int, 0444);
99 MODULE_PARM_DESC(thread,
100                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
101                  "where possible");
102
103 static void srpt_add_one(struct ib_device *device);
104 static void srpt_remove_one(struct ib_device *device);
105 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq);
106 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
107 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt);
108
109 static struct ib_client srpt_client = {
110         .name = DRV_NAME,
111         .add = srpt_add_one,
112         .remove = srpt_remove_one
113 };
114
115 /*
116  * Callback function called by the InfiniBand core when an asynchronous IB
117  * event occurs. This callback may occur in interrupt context. See also
118  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
119  * Architecture Specification.
120  */
121 static void srpt_event_handler(struct ib_event_handler *handler,
122                                struct ib_event *event)
123 {
124         struct srpt_device *sdev =
125             ib_get_client_data(event->device, &srpt_client);
126         struct srpt_port *sport;
127
128         if (!sdev || sdev->device != event->device)
129                 return;
130
131         printk(KERN_WARNING PFX "ASYNC event= %d on device= %s\n",
132                 event->event, sdev->device->name);
133
134         switch (event->event) {
135         case IB_EVENT_PORT_ERR:
136                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
137                         sport = &sdev->port[event->element.port_num - 1];
138                         sport->lid = 0;
139                         sport->sm_lid = 0;
140                 }
141                 break;
142         case IB_EVENT_PORT_ACTIVE:
143         case IB_EVENT_LID_CHANGE:
144         case IB_EVENT_PKEY_CHANGE:
145         case IB_EVENT_SM_CHANGE:
146         case IB_EVENT_CLIENT_REREGISTER:
147                 /*
148                  * Refresh port data asynchronously. Note: it is safe to call
149                  * schedule_work() even if &sport->work is already on the
150                  * global workqueue because schedule_work() tests for the
151                  * work_pending() condition before adding &sport->work to the
152                  * global work queue.
153                  */
154                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
155                         sport = &sdev->port[event->element.port_num - 1];
156                         if (!sport->lid && !sport->sm_lid)
157                                 schedule_work(&sport->work);
158                 }
159                 break;
160         default:
161                 break;
162         }
163
164 }
165
166 /*
167  * Callback function called by the InfiniBand core for SRQ (shared receive
168  * queue) events.
169  */
170 static void srpt_srq_event(struct ib_event *event, void *ctx)
171 {
172         printk(KERN_WARNING PFX "SRQ event %d\n", event->event);
173 }
174
175 /*
176  * Callback function called by the InfiniBand core for QP (queue pair) events.
177  */
178 static void srpt_qp_event(struct ib_event *event, void *ctx)
179 {
180         struct srpt_rdma_ch *ch = ctx;
181
182         printk(KERN_WARNING PFX
183                "QP event %d on cm_id=%p sess_name=%s state=%d\n",
184                event->event, ch->cm_id, ch->sess_name, ch->state);
185
186         switch (event->event) {
187         case IB_EVENT_COMM_EST:
188 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
189                 ib_cm_notify(ch->cm_id, event->event);
190 #else
191                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
192                 printk(KERN_ERR PFX "how to perform ib_cm_notify() on a"
193                         " vanilla 2.6.18 kernel ???\n");
194 #endif
195                 break;
196         case IB_EVENT_QP_LAST_WQE_REACHED:
197                 if (ch->state == RDMA_CHANNEL_LIVE) {
198                         printk(KERN_WARNING PFX
199                                "Schedule CM_DISCONNECT_WORK\n");
200                         srpt_disconnect_channel(ch, 1);
201                 }
202                 break;
203         default:
204                 break;
205         }
206 }
207
208 /*
209  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
210  * the lowest four bits of value in element slot of the array of four bit
211  * elements called c_list (controller list). The index slot is one-based.
212  *
213  * @pre 1 <= slot && 0 <= value && value < 16
214  */
215 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
216 {
217         u16 id;
218         u8 tmp;
219
220         id = (slot - 1) / 2;
221         if (slot & 0x1) {
222                 tmp = c_list[id] & 0xf;
223                 c_list[id] = (value << 4) | tmp;
224         } else {
225                 tmp = c_list[id] & 0xf0;
226                 c_list[id] = (value & 0xf) | tmp;
227         }
228 }
229
230 /*
231  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
232  * ClassPortInfo in the InfiniBand Architecture Specification.
233  */
234 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
235 {
236         struct ib_class_port_info *cif;
237
238         cif = (struct ib_class_port_info *)mad->data;
239         memset(cif, 0, sizeof *cif);
240         cif->base_version = 1;
241         cif->class_version = 1;
242         cif->resp_time_value = 20;
243
244         mad->mad_hdr.status = 0;
245 }
246
247 /*
248  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
249  * InfiniBand Architecture Specification. See also section B.7,
250  * table B.6 in the T10 SRP r16a document.
251  */
252 static void srpt_get_iou(struct ib_dm_mad *mad)
253 {
254         struct ib_dm_iou_info *ioui;
255         u8 slot;
256         int i;
257
258         ioui = (struct ib_dm_iou_info *)mad->data;
259         ioui->change_id = 1;
260         ioui->max_controllers = 16;
261
262         /* set present for slot 1 and empty for the rest */
263         srpt_set_ioc(ioui->controller_list, 1, 1);
264         for (i = 1, slot = 2; i < 16; i++, slot++)
265                 srpt_set_ioc(ioui->controller_list, slot, 0);
266
267         mad->mad_hdr.status = 0;
268 }
269
270 /*
271  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
272  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
273  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
274  * document.
275  */
276 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
277                          struct ib_dm_mad *mad)
278 {
279         struct ib_dm_ioc_profile *iocp;
280
281         iocp = (struct ib_dm_ioc_profile *)mad->data;
282
283         if (!slot || slot > 16) {
284                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
285                 return;
286         }
287
288         if (slot > 2) {
289                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
290                 return;
291         }
292
293         memset(iocp, 0, sizeof *iocp);
294         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
295         iocp->guid = cpu_to_be64(mellanox_ioc_guid);
296         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
297         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
298         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
299         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
300         iocp->subsys_device_id = 0x0;
301         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
302         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
303         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
304         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
305         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
306         iocp->rdma_read_depth = 4;
307         iocp->send_size = cpu_to_be32(MAX_MESSAGE_SIZE);
308         iocp->rdma_size = cpu_to_be32(MAX_RDMA_SIZE);
309         iocp->num_svc_entries = 1;
310         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
311             SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
312
313         mad->mad_hdr.status = 0;
314 }
315
316 /*
317  * Device management: write ServiceEntries to mad for the given slot. See also
318  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
319  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
320  */
321 static void srpt_get_svc_entries(u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
322 {
323         struct ib_dm_svc_entries *svc_entries;
324
325         if (!slot || slot > 16) {
326                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
327                 return;
328         }
329
330         if (slot > 2 || lo > hi || hi > 1) {
331                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
332                 return;
333         }
334
335         svc_entries = (struct ib_dm_svc_entries *)mad->data;
336         memset(svc_entries, 0, sizeof *svc_entries);
337         svc_entries->service_entries[0].id = cpu_to_be64(mellanox_ioc_guid);
338         snprintf(svc_entries->service_entries[0].name,
339                  sizeof(svc_entries->service_entries[0].name),
340                  "%s%016llx",
341                  SRP_SERVICE_NAME_PREFIX,
342                  (unsigned long long)mellanox_ioc_guid);
343
344         mad->mad_hdr.status = 0;
345 }
346
347 /*
348  * Actual processing of a received MAD *rq_mad received through source port *sp
349  * (MAD = InfiniBand management datagram). The response to be sent back is
350  * written to *rsp_mad.
351  */
352 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
353                                  struct ib_dm_mad *rsp_mad)
354 {
355         u16 attr_id;
356         u32 slot;
357         u8 hi, lo;
358
359         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
360         switch (attr_id) {
361         case DM_ATTR_CLASS_PORT_INFO:
362                 srpt_get_class_port_info(rsp_mad);
363                 break;
364         case DM_ATTR_IOU_INFO:
365                 srpt_get_iou(rsp_mad);
366                 break;
367         case DM_ATTR_IOC_PROFILE:
368                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
369                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
370                 break;
371         case DM_ATTR_SVC_ENTRIES:
372                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
373                 hi = (u8) ((slot >> 8) & 0xff);
374                 lo = (u8) (slot & 0xff);
375                 slot = (u16) ((slot >> 16) & 0xffff);
376                 srpt_get_svc_entries(slot, hi, lo, rsp_mad);
377                 break;
378         default:
379                 rsp_mad->mad_hdr.status =
380                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
381                 break;
382         }
383 }
384
385 /*
386  * Callback function that is called by the InfiniBand core after transmission of
387  * a MAD. (MAD = management datagram; AH = address handle.)
388  */
389 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
390                                   struct ib_mad_send_wc *mad_wc)
391 {
392         ib_destroy_ah(mad_wc->send_buf->ah);
393         ib_free_send_mad(mad_wc->send_buf);
394 }
395
396 /*
397  * Callback function that is called by the InfiniBand core after reception of
398  * a MAD (management datagram).
399  */
400 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
401                                   struct ib_mad_recv_wc *mad_wc)
402 {
403         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
404         struct ib_ah *ah;
405         struct ib_mad_send_buf *rsp;
406         struct ib_dm_mad *dm_mad;
407
408         if (!mad_wc || !mad_wc->recv_buf.mad)
409                 return;
410
411         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
412                                   mad_wc->recv_buf.grh, mad_agent->port_num);
413         if (IS_ERR(ah))
414                 goto err;
415
416         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
417
418         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
419                                  mad_wc->wc->pkey_index, 0,
420                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
421                                  GFP_KERNEL);
422         if (IS_ERR(rsp))
423                 goto err_rsp;
424
425         rsp->ah = ah;
426
427         dm_mad = rsp->mad;
428         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
429         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
430         dm_mad->mad_hdr.status = 0;
431
432         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
433         case IB_MGMT_METHOD_GET:
434                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
435                 break;
436         case IB_MGMT_METHOD_SET:
437                 dm_mad->mad_hdr.status =
438                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
439                 break;
440         default:
441                 dm_mad->mad_hdr.status =
442                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
443                 break;
444         }
445
446         if (!ib_post_send_mad(rsp, NULL)) {
447                 ib_free_recv_mad(mad_wc);
448                 /* will destroy_ah & free_send_mad in send completion */
449                 return;
450         }
451
452         ib_free_send_mad(rsp);
453
454 err_rsp:
455         ib_destroy_ah(ah);
456 err:
457         ib_free_recv_mad(mad_wc);
458 }
459
460 /*
461  * Enable InfiniBand management datagram processing, update the cached sm_lid,
462  * lid and gid values, and register a callback function for processing MADs
463  * on the specified port. It is safe to call this function more than once for
464  * the same port.
465  */
466 static int srpt_refresh_port(struct srpt_port *sport)
467 {
468         struct ib_mad_reg_req reg_req;
469         struct ib_port_modify port_modify;
470         struct ib_port_attr port_attr;
471         int ret;
472
473         memset(&port_modify, 0, sizeof port_modify);
474         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
475         port_modify.clr_port_cap_mask = 0;
476
477         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
478         if (ret)
479                 goto err_mod_port;
480
481         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
482         if (ret)
483                 goto err_query_port;
484
485         sport->sm_lid = port_attr.sm_lid;
486         sport->lid = port_attr.lid;
487
488         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
489         if (ret)
490                 goto err_query_port;
491
492         if (!sport->mad_agent) {
493                 memset(&reg_req, 0, sizeof reg_req);
494                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
495                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
496                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
497                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
498
499                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
500                                                          sport->port,
501                                                          IB_QPT_GSI,
502                                                          &reg_req, 0,
503                                                          srpt_mad_send_handler,
504                                                          srpt_mad_recv_handler,
505                                                          sport);
506                 if (IS_ERR(sport->mad_agent)) {
507                         ret = PTR_ERR(sport->mad_agent);
508                         sport->mad_agent = NULL;
509                         goto err_query_port;
510                 }
511         }
512
513         return 0;
514
515 err_query_port:
516
517         port_modify.set_port_cap_mask = 0;
518         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
519         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
520
521 err_mod_port:
522
523         return ret;
524 }
525
526 /*
527  * Unregister the callback function for processing MADs and disable MAD
528  * processing for all ports of the specified device. It is safe to call this
529  * function more than once for the same device.
530  */
531 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
532 {
533         struct ib_port_modify port_modify = {
534                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
535         };
536         struct srpt_port *sport;
537         int i;
538
539         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
540                 sport = &sdev->port[i - 1];
541                 WARN_ON(sport->port != i);
542                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
543                         printk(KERN_ERR PFX "disabling MAD processing"
544                                " failed.\n");
545                 if (sport->mad_agent) {
546                         ib_unregister_mad_agent(sport->mad_agent);
547                         sport->mad_agent = NULL;
548                 }
549         }
550 }
551
552 /*
553  * Allocate and initialize an SRPT I/O context structure.
554  */
555 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
556 {
557         struct srpt_ioctx *ioctx;
558
559         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
560         if (!ioctx)
561                 goto out;
562
563         ioctx->buf = kzalloc(MAX_MESSAGE_SIZE, GFP_KERNEL);
564         if (!ioctx->buf)
565                 goto out_free_ioctx;
566
567         ioctx->dma = dma_map_single(sdev->device->dma_device, ioctx->buf,
568                                     MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
569 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
570         if (dma_mapping_error(sdev->device->dma_device, ioctx->dma))
571 #else
572         if (dma_mapping_error(ioctx->dma))
573 #endif
574                 goto out_free_buf;
575
576         return ioctx;
577
578 out_free_buf:
579         kfree(ioctx->buf);
580 out_free_ioctx:
581         kfree(ioctx);
582 out:
583         return NULL;
584 }
585
586 /*
587  * Deallocate an SRPT I/O context structure.
588  */
589 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
590 {
591         if (!ioctx)
592                 return;
593
594         dma_unmap_single(sdev->device->dma_device, ioctx->dma,
595                          MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
596         kfree(ioctx->buf);
597         kfree(ioctx);
598 }
599
600 /*
601  * Associate a ring of SRPT I/O context structures with the specified device.
602  */
603 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
604 {
605         int i;
606
607         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
608                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
609
610                 if (!sdev->ioctx_ring[i])
611                         goto err;
612
613                 sdev->ioctx_ring[i]->index = i;
614         }
615
616         return 0;
617
618 err:
619         while (--i > 0) {
620                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
621                 sdev->ioctx_ring[i] = NULL;
622         }
623         return -ENOMEM;
624 }
625
626 /* Free the ring of SRPT I/O context structures. */
627 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
628 {
629         int i;
630
631         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
632                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
633                 sdev->ioctx_ring[i] = NULL;
634         }
635 }
636
637 /*
638  * Post a receive request on the work queue of InfiniBand device 'sdev'.
639  */
640 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
641 {
642         struct ib_sge list;
643         struct ib_recv_wr wr, *bad_wr;
644
645         wr.wr_id = ioctx->index | SRPT_OP_RECV;
646
647         list.addr = ioctx->dma;
648         list.length = MAX_MESSAGE_SIZE;
649         list.lkey = sdev->mr->lkey;
650
651         wr.next = NULL;
652         wr.sg_list = &list;
653         wr.num_sge = 1;
654
655         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
656 }
657
658 /*
659  * Post a send request on the SRPT RDMA channel 'ch'.
660  */
661 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
662                           int len)
663 {
664         struct ib_sge list;
665         struct ib_send_wr wr, *bad_wr;
666         struct srpt_device *sdev = ch->sport->sdev;
667
668         dma_sync_single_for_device(sdev->device->dma_device, ioctx->dma,
669                                    MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
670
671         list.addr = ioctx->dma;
672         list.length = len;
673         list.lkey = sdev->mr->lkey;
674
675         wr.next = NULL;
676         wr.wr_id = ioctx->index;
677         wr.sg_list = &list;
678         wr.num_sge = 1;
679         wr.opcode = IB_WR_SEND;
680         wr.send_flags = IB_SEND_SIGNALED;
681
682         return ib_post_send(ch->qp, &wr, &bad_wr);
683 }
684
685 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
686                              int *ind)
687 {
688         struct srp_indirect_buf *idb;
689         struct srp_direct_buf *db;
690
691         *ind = 0;
692         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
693             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
694                 ioctx->n_rbuf = 1;
695                 ioctx->rbufs = &ioctx->single_rbuf;
696
697                 db = (void *)srp_cmd->add_data;
698                 memcpy(ioctx->rbufs, db, sizeof *db);
699                 ioctx->data_len = be32_to_cpu(db->len);
700         } else {
701                 idb = (void *)srp_cmd->add_data;
702
703                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
704
705                 if (ioctx->n_rbuf >
706                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
707                         *ind = 1;
708                         ioctx->n_rbuf = 0;
709                         goto out;
710                 }
711
712                 if (ioctx->n_rbuf == 1)
713                         ioctx->rbufs = &ioctx->single_rbuf;
714                 else
715                         ioctx->rbufs =
716                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
717                 if (!ioctx->rbufs) {
718                         ioctx->n_rbuf = 0;
719                         return -ENOMEM;
720                 }
721
722                 db = idb->desc_list;
723                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
724                 ioctx->data_len = be32_to_cpu(idb->len);
725         }
726 out:
727         return 0;
728 }
729
730 /*
731  * Modify the attributes of queue pair 'qp': allow local write, remote read,
732  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
733  */
734 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
735 {
736         struct ib_qp_attr *attr;
737         int ret;
738
739         attr = kzalloc(sizeof *attr, GFP_KERNEL);
740         if (!attr)
741                 return -ENOMEM;
742
743         attr->qp_state = IB_QPS_INIT;
744         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
745             IB_ACCESS_REMOTE_WRITE;
746         attr->port_num = ch->sport->port;
747         attr->pkey_index = 0;
748
749         ret = ib_modify_qp(qp, attr,
750                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
751                            IB_QP_PKEY_INDEX);
752
753         kfree(attr);
754         return ret;
755 }
756
757 static int srpt_ch_qp_rtr_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp,
758                               enum ib_qp_state qp_state)
759 {
760         struct ib_qp_attr *qp_attr;
761         int attr_mask;
762         int ret;
763
764         qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
765         if (!qp_attr)
766                 return -ENOMEM;
767
768         qp_attr->qp_state = qp_state;
769         ret = ib_cm_init_qp_attr(ch->cm_id, qp_attr, &attr_mask);
770         if (ret)
771                 goto out;
772
773         if (qp_state == IB_QPS_RTR)
774                 qp_attr->max_dest_rd_atomic = 4;
775         else
776                 qp_attr->max_rd_atomic = 4;
777
778         ret = ib_modify_qp(qp, qp_attr, attr_mask);
779
780 out:
781         kfree(qp_attr);
782         return ret;
783 }
784
785 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
786 {
787         int i;
788
789         if (ioctx->n_rdma_ius > 0 && ioctx->rdma_ius) {
790                 struct rdma_iu *riu = ioctx->rdma_ius;
791
792                 for (i = 0; i < ioctx->n_rdma_ius; ++i, ++riu)
793                         kfree(riu->sge);
794                 kfree(ioctx->rdma_ius);
795         }
796
797         if (ioctx->n_rbuf > 1)
798                 kfree(ioctx->rbufs);
799
800         if (srpt_post_recv(ch->sport->sdev, ioctx))
801                 printk(KERN_ERR PFX "SRQ post_recv failed - this is serious\n");
802                 /* we should queue it back to free_ioctx queue */
803         else
804                 atomic_inc(&ch->req_lim_delta);
805 }
806
807 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
808 {
809         struct srpt_ioctx *ioctx;
810         struct srpt_device *sdev = ch->sport->sdev;
811         scst_data_direction dir = SCST_DATA_NONE;
812
813         if (wc->wr_id & SRPT_OP_RECV) {
814                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
815                 printk(KERN_ERR PFX "This is serious - SRQ is in bad state\n");
816         } else {
817                 ioctx = sdev->ioctx_ring[wc->wr_id];
818
819                 if (ioctx->scmnd) {
820                         struct scst_cmd *scmnd = ioctx->scmnd;
821
822                         dir = scst_cmd_get_data_direction(scmnd);
823
824                         if (dir == SCST_DATA_NONE)
825                                 scst_tgt_cmd_done(scmnd,
826                                         scst_estimate_context());
827                         else {
828                                 dma_unmap_sg(sdev->device->dma_device,
829                                              scst_cmd_get_sg(scmnd),
830                                              scst_cmd_get_sg_cnt(scmnd),
831                                              scst_to_tgt_dma_dir(dir));
832
833                                 if (scmnd->state == SCST_CMD_STATE_DATA_WAIT)
834                                         scst_rx_data(scmnd,
835                                                      SCST_RX_STATUS_ERROR,
836                                                      SCST_CONTEXT_THREAD);
837                                 else if (scmnd->state == SCST_CMD_STATE_XMIT_WAIT)
838                                         scst_tgt_cmd_done(scmnd,
839                                                 scst_estimate_context());
840                         }
841                 } else
842                         srpt_reset_ioctx(ch, ioctx);
843         }
844 }
845
846 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
847                                   struct srpt_ioctx *ioctx,
848                                   enum scst_exec_context context)
849 {
850         if (ioctx->scmnd) {
851                 scst_data_direction dir =
852                         scst_cmd_get_data_direction(ioctx->scmnd);
853
854                 if (dir != SCST_DATA_NONE)
855                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
856                                      scst_cmd_get_sg(ioctx->scmnd),
857                                      scst_cmd_get_sg_cnt(ioctx->scmnd),
858                                      scst_to_tgt_dma_dir(dir));
859
860                 scst_tgt_cmd_done(ioctx->scmnd, context);
861         } else
862                 srpt_reset_ioctx(ch, ioctx);
863 }
864
865 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
866                                   struct srpt_ioctx *ioctx)
867 {
868         if (!ioctx->scmnd) {
869                 srpt_reset_ioctx(ch, ioctx);
870                 return;
871         }
872
873         if (scst_cmd_get_data_direction(ioctx->scmnd) == SCST_DATA_WRITE)
874                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
875                         scst_estimate_context());
876 }
877
878 static void srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
879                                struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
880                                u64 tag)
881 {
882         struct srp_rsp *srp_rsp;
883         struct sense_data *sense;
884         int limit_delta;
885
886         srp_rsp = ioctx->buf;
887         memset(srp_rsp, 0, sizeof *srp_rsp);
888
889         limit_delta = atomic_read(&ch->req_lim_delta);
890         atomic_sub(limit_delta, &ch->req_lim_delta);
891
892         srp_rsp->opcode = SRP_RSP;
893         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
894         srp_rsp->tag = tag;
895
896         if (s_key != NO_SENSE) {
897                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
898                 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
899                 srp_rsp->sense_data_len =
900                     cpu_to_be32(sizeof *sense + (sizeof *sense % 4));
901
902                 sense = (struct sense_data *)(srp_rsp + 1);
903                 sense->err_code = 0x70;
904                 sense->key = s_key;
905                 sense->asc_ascq = s_code;
906         }
907 }
908
909 static void srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
910                                    struct srpt_ioctx *ioctx, u8 rsp_code,
911                                    u64 tag)
912 {
913         struct srp_rsp *srp_rsp;
914         int limit_delta;
915
916         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
917                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
918
919         srp_rsp = ioctx->buf;
920         memset(srp_rsp, 0, sizeof *srp_rsp);
921
922         limit_delta = atomic_read(&ch->req_lim_delta);
923         atomic_sub(limit_delta, &ch->req_lim_delta);
924
925         srp_rsp->opcode = SRP_RSP;
926         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
927         srp_rsp->tag = tag;
928
929         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
930                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
931                 srp_rsp->resp_data_len = cpu_to_be32(4);
932                 srp_rsp->data[3] = rsp_code;
933         }
934 }
935
936 /*
937  * Process SRP_CMD.
938  */
939 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
940 {
941         struct scst_cmd *scmnd = NULL;
942         struct srp_cmd *srp_cmd = NULL;
943         scst_data_direction dir = SCST_DATA_NONE;
944         int indirect_desc = 0;
945         int ret;
946         unsigned long flags;
947
948         srp_cmd = ioctx->buf;
949
950         if (srp_cmd->buf_fmt) {
951                 ret = srpt_get_desc_tbl(ioctx, srp_cmd, &indirect_desc);
952                 if (ret) {
953                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
954                                            NO_ADD_SENSE, srp_cmd->tag);
955                         ((struct srp_rsp *)ioctx->buf)->status =
956                                         SAM_STAT_TASK_SET_FULL;
957                         goto send_rsp;
958                 }
959
960                 if (indirect_desc) {
961                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
962                                            NO_ADD_SENSE, srp_cmd->tag);
963                         ((struct srp_rsp *)ioctx->buf)->status =
964                                         SAM_STAT_TASK_SET_FULL;
965                         goto send_rsp;
966                 }
967
968                 if (srp_cmd->buf_fmt & 0xf)
969                         dir = SCST_DATA_READ;
970                 else if (srp_cmd->buf_fmt >> 4)
971                         dir = SCST_DATA_WRITE;
972                 else
973                         dir = SCST_DATA_NONE;
974         } else
975                 dir = SCST_DATA_NONE;
976
977         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
978                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
979                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
980         if (!scmnd) {
981                 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
982                                    NO_ADD_SENSE, srp_cmd->tag);
983                 ((struct srp_rsp *)ioctx->buf)->status =
984                         SAM_STAT_TASK_SET_FULL;
985                 goto send_rsp;
986         }
987
988         ioctx->scmnd = scmnd;
989
990         switch (srp_cmd->task_attr) {
991         case SRP_CMD_HEAD_OF_Q:
992                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
993                 break;
994         case SRP_CMD_ORDERED_Q:
995                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
996                 break;
997         case SRP_CMD_SIMPLE_Q:
998                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
999                 break;
1000         case SRP_CMD_ACA:
1001                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
1002                 break;
1003         default:
1004                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1005                 break;
1006         }
1007
1008         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1009         scst_cmd_set_tgt_priv(scmnd, ioctx);
1010         scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
1011
1012         spin_lock_irqsave(&ch->spinlock, flags);
1013         list_add_tail(&ioctx->scmnd_list, &ch->active_scmnd_list);
1014         ch->active_scmnd_cnt++;
1015         spin_unlock_irqrestore(&ch->spinlock, flags);
1016
1017         scst_cmd_init_done(scmnd, scst_estimate_context());
1018
1019         return 0;
1020
1021 send_rsp:
1022         return -1;
1023 }
1024
1025 /*
1026  * Process SRP_TSK_MGMT. See also table 19 in the T10 SRP r16a document.
1027  */
1028 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1029                                 struct srpt_ioctx *ioctx)
1030 {
1031         struct srp_tsk_mgmt *srp_tsk = NULL;
1032         struct srpt_mgmt_ioctx *mgmt_ioctx;
1033         int ret;
1034
1035         srp_tsk = ioctx->buf;
1036
1037         printk(KERN_WARNING PFX
1038                "recv_tsk_mgmt= %d for task_tag= %lld"
1039                " using tag= %lld cm_id= %p sess= %p\n",
1040                srp_tsk->tsk_mgmt_func,
1041                (unsigned long long) srp_tsk->task_tag,
1042                (unsigned long long) srp_tsk->tag,
1043                ch->cm_id, ch->scst_sess);
1044
1045         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1046         if (!mgmt_ioctx) {
1047                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1048                                        srp_tsk->tag);
1049                 goto send_rsp;
1050         }
1051
1052         mgmt_ioctx->ioctx = ioctx;
1053         mgmt_ioctx->ch = ch;
1054         mgmt_ioctx->tag = srp_tsk->tag;
1055
1056         switch (srp_tsk->tsk_mgmt_func) {
1057         case SRP_TSK_ABORT_TASK:
1058                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1059                                           SCST_ABORT_TASK,
1060                                           srp_tsk->task_tag,
1061                                           thread ?
1062                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1063                                           mgmt_ioctx);
1064                 break;
1065         case SRP_TSK_ABORT_TASK_SET:
1066                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1067                                           SCST_ABORT_TASK_SET,
1068                                           (u8 *) &srp_tsk->lun,
1069                                           sizeof srp_tsk->lun,
1070                                           thread ?
1071                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1072                                           mgmt_ioctx);
1073                 break;
1074         case SRP_TSK_CLEAR_TASK_SET:
1075                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1076                                           SCST_CLEAR_TASK_SET,
1077                                           (u8 *) &srp_tsk->lun,
1078                                           sizeof srp_tsk->lun,
1079                                           thread ?
1080                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1081                                           mgmt_ioctx);
1082                 break;
1083 #if 0
1084         case SRP_TSK_LUN_RESET:
1085                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1086                                           SCST_LUN_RESET,
1087                                           (u8 *) &srp_tsk->lun,
1088                                           sizeof srp_tsk->lun,
1089                                           thread ?
1090                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1091                                           mgmt_ioctx);
1092                 break;
1093 #endif
1094         case SRP_TSK_CLEAR_ACA:
1095                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1096                                           SCST_CLEAR_ACA,
1097                                           (u8 *) &srp_tsk->lun,
1098                                           sizeof srp_tsk->lun,
1099                                           thread ?
1100                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1101                                           mgmt_ioctx);
1102                 break;
1103         default:
1104                 srpt_build_tskmgmt_rsp(ch, ioctx,
1105                                        SRP_TSK_MGMT_FUNC_NOT_SUPP,
1106                                        srp_tsk->tag);
1107                 goto send_rsp;
1108         }
1109         return 0;
1110
1111 send_rsp:
1112         return -1;
1113 }
1114
1115 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1116                                struct srpt_ioctx *ioctx)
1117 {
1118         u8 op;
1119         unsigned long flags;
1120
1121         if (ch->state != RDMA_CHANNEL_LIVE) {
1122                 if (ch->state == RDMA_CHANNEL_CONNECTING) {
1123                         spin_lock_irqsave(&ch->spinlock, flags);
1124                         list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1125                         spin_unlock_irqrestore(&ch->spinlock, flags);
1126                 } else
1127                         srpt_reset_ioctx(ch, ioctx);
1128
1129                 return;
1130         }
1131
1132         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
1133                                 MAX_MESSAGE_SIZE, DMA_FROM_DEVICE);
1134
1135         ioctx->data_len = 0;
1136         ioctx->n_rbuf = 0;
1137         ioctx->rbufs = NULL;
1138         ioctx->n_rdma = 0;
1139         ioctx->n_rdma_ius = 0;
1140         ioctx->rdma_ius = NULL;
1141         ioctx->scmnd = NULL;
1142
1143         op = *(u8 *) ioctx->buf;
1144         switch (op) {
1145         case SRP_CMD:
1146                 if (srpt_handle_cmd(ch, ioctx) < 0)
1147                         goto send_rsp;
1148                 break;
1149
1150         case SRP_TSK_MGMT:
1151                 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1152                         goto send_rsp;
1153                 break;
1154
1155         case SRP_I_LOGOUT:
1156         case SRP_AER_REQ:
1157         default:
1158                 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1159                                    ((struct srp_cmd *)ioctx->buf)->tag);
1160
1161                 goto send_rsp;
1162         }
1163
1164         dma_sync_single_for_device(ch->sport->sdev->device->dma_device,
1165                                    ioctx->dma, MAX_MESSAGE_SIZE,
1166                                    DMA_FROM_DEVICE);
1167
1168         return;
1169
1170 send_rsp:
1171         if (ch->state != RDMA_CHANNEL_LIVE ||
1172             srpt_post_send(ch, ioctx,
1173                            sizeof(struct srp_rsp) +
1174                            be32_to_cpu(((struct srp_rsp *)ioctx->buf)->
1175                                        sense_data_len)))
1176                 srpt_reset_ioctx(ch, ioctx);
1177 }
1178
1179 /*
1180  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1181  * should stop.
1182  * @pre thread != 0
1183  */
1184 static inline int srpt_test_ioctx_list(void)
1185 {
1186         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1187                    unlikely(kthread_should_stop()));
1188         return res;
1189 }
1190
1191 /*
1192  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1193  *
1194  * @pre thread != 0
1195  */
1196 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1197 {
1198         unsigned long flags;
1199
1200         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1201         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1202         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1203         wake_up(&ioctx_list_waitQ);
1204 }
1205
1206 /*
1207  * InfiniBand CQ (completion queue) event handler for asynchronous events not
1208  * associated with a completion.
1209  */
1210 static void srpt_completion(struct ib_cq *cq, void *ctx)
1211 {
1212         struct srpt_rdma_ch *ch = ctx;
1213         struct srpt_device *sdev = ch->sport->sdev;
1214         struct ib_wc wc;
1215         struct srpt_ioctx *ioctx;
1216
1217         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1218         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1219                 if (wc.status) {
1220                         printk(KERN_ERR PFX "failed %s status= %d\n",
1221                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1222                                wc.status);
1223                         srpt_handle_err_comp(ch, &wc);
1224                         break;
1225                 }
1226
1227                 if (wc.wr_id & SRPT_OP_RECV) {
1228                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1229                         if (thread) {
1230                                 ioctx->ch = ch;
1231                                 ioctx->op = IB_WC_RECV;
1232                                 srpt_schedule_thread(ioctx);
1233                         } else
1234                                 srpt_handle_new_iu(ch, ioctx);
1235                         continue;
1236                 } else
1237                         ioctx = sdev->ioctx_ring[wc.wr_id];
1238
1239                 if (thread) {
1240                         ioctx->ch = ch;
1241                         ioctx->op = wc.opcode;
1242                         srpt_schedule_thread(ioctx);
1243                 } else {
1244                         switch (wc.opcode) {
1245                         case IB_WC_SEND:
1246                                 srpt_handle_send_comp(ch, ioctx,
1247                                         scst_estimate_context());
1248                                 break;
1249                         case IB_WC_RDMA_WRITE:
1250                         case IB_WC_RDMA_READ:
1251                                 srpt_handle_rdma_comp(ch, ioctx);
1252                                 break;
1253                         default:
1254                                 break;
1255                         }
1256                 }
1257         }
1258 }
1259
1260 /*
1261  * Create a completion queue on the specified device.
1262  */
1263 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1264 {
1265         struct ib_qp_init_attr *qp_init;
1266         struct srpt_device *sdev = ch->sport->sdev;
1267         int cqe;
1268         int ret;
1269
1270         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1271         if (!qp_init)
1272                 return -ENOMEM;
1273
1274         /* Create a completion queue (CQ). */
1275
1276         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1277 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1278         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1279 #else
1280         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1281 #endif
1282         if (IS_ERR(ch->cq)) {
1283                 ret = PTR_ERR(ch->cq);
1284                 printk(KERN_ERR PFX "failed to create_cq cqe= %d ret= %d\n",
1285                         cqe, ret);
1286                 goto out;
1287         }
1288
1289         /* Request completion notification. */
1290
1291         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1292
1293         /* Create a queue pair (QP). */
1294
1295         qp_init->qp_context = (void *)ch;
1296         qp_init->event_handler = srpt_qp_event;
1297         qp_init->send_cq = ch->cq;
1298         qp_init->recv_cq = ch->cq;
1299         qp_init->srq = sdev->srq;
1300         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1301         qp_init->qp_type = IB_QPT_RC;
1302         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1303         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1304
1305         ch->qp = ib_create_qp(sdev->pd, qp_init);
1306         if (IS_ERR(ch->qp)) {
1307                 ret = PTR_ERR(ch->qp);
1308                 ib_destroy_cq(ch->cq);
1309                 printk(KERN_ERR PFX "failed to create_qp ret= %d\n", ret);
1310                 goto out;
1311         }
1312
1313         printk(KERN_DEBUG PFX "%s: max_cqe= %d max_sge= %d cm_id= %p\n",
1314                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1315                ch->cm_id);
1316
1317         /* Modify the attributes and the state of queue pair ch->qp. */
1318
1319         ret = srpt_init_ch_qp(ch, ch->qp);
1320         if (ret) {
1321                 ib_destroy_qp(ch->qp);
1322                 ib_destroy_cq(ch->cq);
1323                 goto out;
1324         }
1325
1326         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1327 out:
1328         kfree(qp_init);
1329         return ret;
1330 }
1331
1332 static struct srpt_rdma_ch *srpt_find_channel(struct ib_cm_id *cm_id)
1333 {
1334         struct srpt_device *sdev = cm_id->context;
1335         struct srpt_rdma_ch *ch, *tmp_ch;
1336
1337         spin_lock_irq(&sdev->spinlock);
1338         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1339                 if (ch->cm_id == cm_id) {
1340                         spin_unlock_irq(&sdev->spinlock);
1341                         return ch;
1342                 }
1343         }
1344
1345         spin_unlock_irq(&sdev->spinlock);
1346
1347         return NULL;
1348 }
1349
1350 static int srpt_release_channel(struct srpt_rdma_ch *ch, int destroy_cmid)
1351 {
1352         TRACE_ENTRY();
1353
1354         spin_lock_irq(&ch->sport->sdev->spinlock);
1355         list_del(&ch->list);
1356         spin_unlock_irq(&ch->sport->sdev->spinlock);
1357
1358         if (ch->cm_id && destroy_cmid) {
1359                 printk(KERN_WARNING PFX
1360                        "%s: destroy cm_id= %p\n", __func__, ch->cm_id);
1361                 ib_destroy_cm_id(ch->cm_id);
1362                 ch->cm_id = NULL;
1363         }
1364
1365         ib_destroy_qp(ch->qp);
1366         ib_destroy_cq(ch->cq);
1367
1368         if (ch->scst_sess) {
1369                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1370
1371                 printk(KERN_WARNING PFX
1372                        "%s: release sess= %p sess_name= %s active_cmd= %d\n",
1373                        __func__, ch->scst_sess, ch->sess_name,
1374                        ch->active_scmnd_cnt);
1375
1376                 list_for_each_entry_safe(ioctx, ioctx_tmp,
1377                                          &ch->active_scmnd_list, scmnd_list) {
1378                         list_del(&ioctx->scmnd_list);
1379                         ch->active_scmnd_cnt--;
1380                 }
1381
1382                 scst_unregister_session(ch->scst_sess, 0, NULL);
1383                 ch->scst_sess = NULL;
1384         }
1385
1386         kfree(ch);
1387
1388         TRACE_EXIT_RES(!destroy_cmid);
1389
1390         return destroy_cmid ? 0 : 1;
1391 }
1392
1393 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq)
1394 {
1395         spin_lock_irq(&ch->spinlock);
1396         ch->state = RDMA_CHANNEL_DISCONNECTING;
1397         spin_unlock_irq(&ch->spinlock);
1398
1399         if (dreq)
1400                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
1401         else
1402                 ib_send_cm_drep(ch->cm_id, NULL, 0);
1403
1404         return 0;
1405 }
1406
1407 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1408                             struct ib_cm_req_event_param *param,
1409                             void *private_data)
1410 {
1411         struct srpt_device *sdev = cm_id->context;
1412         struct srp_login_req *req;
1413         struct srp_login_rsp *rsp;
1414         struct srp_login_rej *rej;
1415         struct ib_cm_rep_param *rep_param;
1416         struct srpt_rdma_ch *ch, *tmp_ch;
1417         u32 it_iu_len;
1418         int ret = 0;
1419
1420         if (!sdev || !private_data)
1421                 return -EINVAL;
1422
1423         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1424         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1425         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1426
1427         if (!rsp || !rej || !rep_param) {
1428                 ret = -ENOMEM;
1429                 goto out;
1430         }
1431
1432         req = (struct srp_login_req *)private_data;
1433
1434         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1435
1436         printk(KERN_DEBUG PFX
1437                "Host login i_port_id=0x%llx:0x%llx t_port_id=0x%llx:0x%llx"
1438                " it_iu_len=%d\n",
1439                (unsigned long long)
1440                be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1441                (unsigned long long)
1442                be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1443                (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1444                (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1445                it_iu_len);
1446
1447         if (it_iu_len > MAX_MESSAGE_SIZE || it_iu_len < 64) {
1448                 rej->reason =
1449                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1450                 ret = -EINVAL;
1451                 printk(KERN_WARNING PFX
1452                        "Reject invalid it_iu_len=%d\n", it_iu_len);
1453                 goto reject;
1454         }
1455
1456         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1457                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1458
1459                 spin_lock_irq(&sdev->spinlock);
1460
1461                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1462                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1463                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1464                             && param->port == ch->sport->port
1465                             && param->listen_id == ch->sport->sdev->cm_id
1466                             && ch->cm_id) {
1467                                 /* found an existing channel */
1468                                 printk(KERN_WARNING PFX
1469                                        "Found existing channel name= %s"
1470                                        " cm_id= %p state= %d\n",
1471                                        ch->sess_name, ch->cm_id, ch->state);
1472
1473                                 spin_unlock_irq(&sdev->spinlock);
1474
1475                                 rsp->rsp_flags =
1476                                     SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1477
1478                                 if (ch->state == RDMA_CHANNEL_LIVE)
1479                                         srpt_disconnect_channel(ch, 1);
1480                                 else if (ch->state == RDMA_CHANNEL_CONNECTING) {
1481                                         ib_send_cm_rej(ch->cm_id,
1482                                                        IB_CM_REJ_NO_RESOURCES,
1483                                                        NULL, 0, NULL, 0);
1484                                         srpt_release_channel(ch, 1);
1485                                 }
1486
1487                                 spin_lock_irq(&sdev->spinlock);
1488                         }
1489                 }
1490
1491                 spin_unlock_irq(&sdev->spinlock);
1492
1493         } else
1494                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1495
1496         if (((u64) (*(u64 *) req->target_port_id) !=
1497              cpu_to_be64(mellanox_ioc_guid)) ||
1498             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1499              cpu_to_be64(mellanox_ioc_guid))) {
1500                 rej->reason =
1501                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1502                 ret = -ENOMEM;
1503                 printk(KERN_WARNING PFX "Reject invalid target_port_id\n");
1504                 goto reject;
1505         }
1506
1507         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1508         if (!ch) {
1509                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1510                 printk(KERN_WARNING PFX "Reject failed allocate rdma_ch\n");
1511                 ret = -ENOMEM;
1512                 goto reject;
1513         }
1514
1515         spin_lock_init(&ch->spinlock);
1516         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1517         memcpy(ch->t_port_id, req->target_port_id, 16);
1518         ch->sport = &sdev->port[param->port - 1];
1519         ch->cm_id = cm_id;
1520         ch->state = RDMA_CHANNEL_CONNECTING;
1521         INIT_LIST_HEAD(&ch->cmd_wait_list);
1522         INIT_LIST_HEAD(&ch->active_scmnd_list);
1523
1524         ret = srpt_create_ch_ib(ch);
1525         if (ret) {
1526                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1527                 printk(KERN_WARNING PFX "Reject failed to create rdma_ch\n");
1528                 goto free_ch;
1529         }
1530
1531         ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTR);
1532         if (ret) {
1533                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1534                 printk(KERN_WARNING PFX
1535                        "Reject failed qp to rtr/rts ret=%d\n", ret);
1536                 goto destroy_ib;
1537         }
1538
1539         snprintf(ch->sess_name, sizeof(ch->sess_name),
1540                  "0x%016llx%016llx",
1541                  (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1542                  (unsigned long long)be64_to_cpu(*(u64 *)(ch->i_port_id + 8)));
1543
1544         TRACE_DBG("registering session %s", ch->sess_name);
1545
1546         BUG_ON(!sdev->scst_tgt);
1547         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1548                                   NULL, NULL);
1549         if (!ch->scst_sess) {
1550                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1551                 printk(KERN_WARNING PFX "Failed to create scst sess\n");
1552                 goto destroy_ib;
1553         }
1554
1555         spin_lock_irq(&sdev->spinlock);
1556         list_add_tail(&ch->list, &sdev->rch_list);
1557         spin_unlock_irq(&sdev->spinlock);
1558
1559         printk(KERN_DEBUG PFX "Establish connection sess=%p name=%s cm_id=%p\n",
1560                ch->scst_sess, ch->sess_name, ch->cm_id);
1561
1562         scst_sess_set_tgt_priv(ch->scst_sess, ch);
1563
1564         /* create srp_login_response */
1565         rsp->opcode = SRP_LOGIN_RSP;
1566         rsp->tag = req->tag;
1567         rsp->max_it_iu_len = req->req_it_iu_len;
1568         rsp->max_ti_iu_len = req->req_it_iu_len;
1569         rsp->buf_fmt =
1570             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1571         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1572         atomic_set(&ch->req_lim_delta, 0);
1573
1574         /* create cm reply */
1575         rep_param->qp_num = ch->qp->qp_num;
1576         rep_param->private_data = (void *)rsp;
1577         rep_param->private_data_len = sizeof *rsp;
1578         rep_param->rnr_retry_count = 7;
1579         rep_param->flow_control = 1;
1580         rep_param->failover_accepted = 0;
1581         rep_param->srq = 1;
1582         rep_param->responder_resources = 4;
1583         rep_param->initiator_depth = 4;
1584
1585         ret = ib_send_cm_rep(cm_id, rep_param);
1586         if (ret)
1587                 srpt_release_channel(ch, 0);
1588
1589         goto out;
1590
1591 destroy_ib:
1592         ib_destroy_qp(ch->qp);
1593         ib_destroy_cq(ch->cq);
1594
1595 free_ch:
1596         kfree(ch);
1597
1598 reject:
1599         rej->opcode = SRP_LOGIN_REJ;
1600         rej->tag = req->tag;
1601         rej->buf_fmt =
1602             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1603
1604         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1605                              (void *)rej, sizeof *rej);
1606
1607 out:
1608         kfree(rep_param);
1609         kfree(rsp);
1610         kfree(rej);
1611
1612         return ret;
1613 }
1614
1615 static int srpt_find_and_release_channel(struct ib_cm_id *cm_id)
1616 {
1617         struct srpt_rdma_ch *ch;
1618
1619         ch = srpt_find_channel(cm_id);
1620         if (!ch)
1621                 return -EINVAL;
1622
1623         return srpt_release_channel(ch, 0);
1624 }
1625
1626 static int srpt_cm_rej_recv(struct ib_cm_id *cm_id)
1627 {
1628         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1629         return srpt_find_and_release_channel(cm_id);
1630 }
1631
1632 static int srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
1633 {
1634         struct srpt_rdma_ch *ch;
1635         int ret;
1636
1637         ch = srpt_find_channel(cm_id);
1638         if (!ch)
1639                 return -EINVAL;
1640
1641         if (ch->state == RDMA_CHANNEL_CONNECTING) {
1642                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1643
1644                 spin_lock_irq(&ch->spinlock);
1645                 ch->state = RDMA_CHANNEL_LIVE;
1646                 spin_unlock_irq(&ch->spinlock);
1647                 ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTS);
1648
1649                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
1650                                          wait_list) {
1651                         list_del(&ioctx->wait_list);
1652                         srpt_handle_new_iu(ch, ioctx);
1653                 }
1654         } else if (ch->state == RDMA_CHANNEL_DISCONNECTING)
1655                 ret = -EAGAIN;
1656         else
1657                 ret = 0;
1658
1659         if (ret) {
1660                 printk(KERN_ERR PFX "cm_id=%p sess_name=%s state=%d\n",
1661                        cm_id, ch->sess_name, ch->state);
1662                 srpt_disconnect_channel(ch, 1);
1663         }
1664
1665         return ret;
1666 }
1667
1668 static int srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
1669 {
1670         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1671         return srpt_find_and_release_channel(cm_id);
1672 }
1673
1674 static int srpt_cm_rep_error(struct ib_cm_id *cm_id)
1675 {
1676         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1677         return srpt_find_and_release_channel(cm_id);
1678 }
1679
1680 static int srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
1681 {
1682         struct srpt_rdma_ch *ch;
1683         int ret = 0;
1684
1685         ch = srpt_find_channel(cm_id);
1686
1687         if (!ch)
1688                 return -EINVAL;
1689
1690         printk(KERN_DEBUG PFX "%s: cm_id= %p ch->state= %d\n",
1691                  __func__, cm_id, ch->state);
1692
1693         switch (ch->state) {
1694         case RDMA_CHANNEL_LIVE:
1695         case RDMA_CHANNEL_CONNECTING:
1696                 ret = srpt_disconnect_channel(ch, 0);
1697                 break;
1698         case RDMA_CHANNEL_DISCONNECTING:
1699         default:
1700                 break;
1701         }
1702
1703         return ret;
1704 }
1705
1706 static int srpt_cm_drep_recv(struct ib_cm_id *cm_id)
1707 {
1708         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1709         return srpt_find_and_release_channel(cm_id);
1710 }
1711
1712 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1713 {
1714         int ret = 0;
1715
1716         switch (event->event) {
1717         case IB_CM_REQ_RECEIVED:
1718                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
1719                                        event->private_data);
1720                 break;
1721         case IB_CM_REJ_RECEIVED:
1722                 ret = srpt_cm_rej_recv(cm_id);
1723                 break;
1724         case IB_CM_RTU_RECEIVED:
1725         case IB_CM_USER_ESTABLISHED:
1726                 ret = srpt_cm_rtu_recv(cm_id);
1727                 break;
1728         case IB_CM_DREQ_RECEIVED:
1729                 ret = srpt_cm_dreq_recv(cm_id);
1730                 break;
1731         case IB_CM_DREP_RECEIVED:
1732                 ret = srpt_cm_drep_recv(cm_id);
1733                 break;
1734         case IB_CM_TIMEWAIT_EXIT:
1735                 ret = srpt_cm_timewait_exit(cm_id);
1736                 break;
1737         case IB_CM_REP_ERROR:
1738                 ret = srpt_cm_rep_error(cm_id);
1739                 break;
1740         default:
1741                 break;
1742         }
1743
1744         return ret;
1745 }
1746
1747 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1748                                  struct srpt_ioctx *ioctx,
1749                                  struct scst_cmd *scmnd)
1750 {
1751         struct scatterlist *scat;
1752         scst_data_direction dir;
1753         struct rdma_iu *riu;
1754         struct srp_direct_buf *db;
1755         dma_addr_t dma_addr;
1756         struct ib_sge *sge;
1757         u64 raddr;
1758         u32 rsize;
1759         u32 tsize;
1760         u32 dma_len;
1761         int count, nrdma;
1762         int i, j, k;
1763
1764         scat = scst_cmd_get_sg(scmnd);
1765         dir = scst_cmd_get_data_direction(scmnd);
1766         count = dma_map_sg(ch->sport->sdev->device->dma_device, scat,
1767                            scst_cmd_get_sg_cnt(scmnd),
1768                            scst_to_tgt_dma_dir(dir));
1769         if (unlikely(!count))
1770                 return -EBUSY;
1771
1772         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
1773                 nrdma = ioctx->n_rdma_ius;
1774         else {
1775                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
1776
1777                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
1778                                           scst_cmd_atomic(scmnd)
1779                                           ? GFP_ATOMIC : GFP_KERNEL);
1780                 if (!ioctx->rdma_ius) {
1781                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1782                                      scat, scst_cmd_get_sg_cnt(scmnd),
1783                                      scst_to_tgt_dma_dir(dir));
1784                         return -ENOMEM;
1785                 }
1786
1787                 ioctx->n_rdma_ius = nrdma;
1788         }
1789
1790         db = ioctx->rbufs;
1791         tsize = (dir == SCST_DATA_READ) ?
1792                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1793         dma_len = sg_dma_len(&scat[0]);
1794         riu = ioctx->rdma_ius;
1795
1796         /*
1797          * For each remote desc - calculate the #ib_sge.
1798          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
1799          *      each remote desc rdma_iu is required a rdma wr;
1800          * else
1801          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
1802          *      another rdma wr
1803          */
1804         for (i = 0, j = 0;
1805              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1806                 rsize = be32_to_cpu(db->len);
1807                 raddr = be64_to_cpu(db->va);
1808                 riu->raddr = raddr;
1809                 riu->rkey = be32_to_cpu(db->key);
1810                 riu->sge_cnt = 0;
1811
1812                 /* calculate how many sge required for this remote_buf */
1813                 while (rsize > 0 && tsize > 0) {
1814
1815                         if (rsize >= dma_len) {
1816                                 tsize -= dma_len;
1817                                 rsize -= dma_len;
1818                                 raddr += dma_len;
1819
1820                                 if (tsize > 0) {
1821                                         ++j;
1822                                         if (j < count)
1823                                                 dma_len = sg_dma_len(&scat[j]);
1824                                 }
1825                         } else {
1826                                 tsize -= rsize;
1827                                 dma_len -= rsize;
1828                                 rsize = 0;
1829                         }
1830
1831                         ++riu->sge_cnt;
1832
1833                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
1834                                 riu->sge =
1835                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
1836                                             scst_cmd_atomic(scmnd)
1837                                             ? GFP_ATOMIC : GFP_KERNEL);
1838                                 if (!riu->sge)
1839                                         goto free_mem;
1840
1841                                 ++ioctx->n_rdma;
1842                                 ++riu;
1843                                 riu->sge_cnt = 0;
1844                                 riu->raddr = raddr;
1845                                 riu->rkey = be32_to_cpu(db->key);
1846                         }
1847                 }
1848
1849                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
1850                                    scst_cmd_atomic(scmnd)
1851                                    ? GFP_ATOMIC : GFP_KERNEL);
1852
1853                 if (!riu->sge)
1854                         goto free_mem;
1855
1856                 ++ioctx->n_rdma;
1857         }
1858
1859         db = ioctx->rbufs;
1860         scat = scst_cmd_get_sg(scmnd);
1861         tsize = (dir == SCST_DATA_READ) ?
1862                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1863         riu = ioctx->rdma_ius;
1864         dma_len = sg_dma_len(&scat[0]);
1865         dma_addr = sg_dma_address(&scat[0]);
1866
1867         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
1868         for (i = 0, j = 0;
1869              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1870                 rsize = be32_to_cpu(db->len);
1871                 sge = riu->sge;
1872                 k = 0;
1873
1874                 while (rsize > 0 && tsize > 0) {
1875                         sge->addr = dma_addr;
1876                         sge->lkey = ch->sport->sdev->mr->lkey;
1877
1878                         if (rsize >= dma_len) {
1879                                 sge->length =
1880                                         (tsize < dma_len) ? tsize : dma_len;
1881                                 tsize -= dma_len;
1882                                 rsize -= dma_len;
1883
1884                                 if (tsize > 0) {
1885                                         ++j;
1886                                         if (j < count) {
1887                                                 dma_len = sg_dma_len(&scat[j]);
1888                                                 dma_addr =
1889                                                     sg_dma_address(&scat[j]);
1890                                         }
1891                                 }
1892                         } else {
1893                                 sge->length = (tsize < rsize) ? tsize : rsize;
1894                                 tsize -= rsize;
1895                                 dma_len -= rsize;
1896                                 dma_addr += rsize;
1897                                 rsize = 0;
1898                         }
1899
1900                         ++k;
1901                         if (k == riu->sge_cnt && rsize > 0) {
1902                                 ++riu;
1903                                 sge = riu->sge;
1904                                 k = 0;
1905                         } else if (rsize > 0)
1906                                 ++sge;
1907                 }
1908         }
1909
1910         return 0;
1911
1912 free_mem:
1913         while (ioctx->n_rdma)
1914                 kfree(ioctx->rdma_ius[ioctx->n_rdma--].sge);
1915
1916         kfree(ioctx->rdma_ius);
1917
1918         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1919                      scat, scst_cmd_get_sg_cnt(scmnd),
1920                      scst_to_tgt_dma_dir(dir));
1921
1922         return -ENOMEM;
1923 }
1924
1925 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1926                               scst_data_direction dir)
1927 {
1928         struct ib_send_wr wr;
1929         struct ib_send_wr *bad_wr;
1930         struct rdma_iu *riu;
1931         int i;
1932         int ret = 0;
1933
1934         riu = ioctx->rdma_ius;
1935         memset(&wr, 0, sizeof wr);
1936
1937         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
1938                 wr.opcode = (dir == SCST_DATA_READ) ?
1939                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
1940                 wr.next = NULL;
1941                 wr.wr_id = ioctx->index;
1942                 wr.wr.rdma.remote_addr = riu->raddr;
1943                 wr.wr.rdma.rkey = riu->rkey;
1944                 wr.num_sge = riu->sge_cnt;
1945                 wr.sg_list = riu->sge;
1946
1947                 /* only get completion event for the last rdma wr */
1948                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
1949                         wr.send_flags = IB_SEND_SIGNALED;
1950
1951                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
1952                 if (ret)
1953                         break;
1954         }
1955
1956         return ret;
1957 }
1958
1959 /*
1960  * Start data reception. Must not block.
1961  */
1962 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1963                           struct scst_cmd *scmnd)
1964 {
1965         int ret;
1966
1967         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
1968         if (ret) {
1969                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1970                 ret = SCST_TGT_RES_QUEUE_FULL;
1971                 goto out;
1972         }
1973
1974         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
1975         if (ret) {
1976                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1977                 if (ret == -EAGAIN || ret == -ENOMEM)
1978                         ret = SCST_TGT_RES_QUEUE_FULL;
1979                 else
1980                         ret = SCST_TGT_RES_FATAL_ERROR;
1981                 goto out;
1982         }
1983
1984         ret = SCST_TGT_RES_SUCCESS;
1985
1986 out:
1987         return ret;
1988 }
1989
1990 /*
1991  * Called by the SCST core to inform ib_srpt that data reception should start.
1992  * Must not block.
1993  */
1994 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
1995 {
1996         struct srpt_rdma_ch *ch;
1997         struct srpt_ioctx *ioctx;
1998
1999         ioctx = scst_cmd_get_tgt_priv(scmnd);
2000         BUG_ON(!ioctx);
2001
2002         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2003         BUG_ON(!ch);
2004
2005         if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2006                 return SCST_TGT_RES_FATAL_ERROR;
2007         else if (ch->state == RDMA_CHANNEL_CONNECTING)
2008                 return SCST_TGT_RES_QUEUE_FULL;
2009
2010         return srpt_xfer_data(ch, ioctx, scmnd);
2011 }
2012
2013 /*
2014  * Called by the SCST core. Transmits the response buffer and status held in
2015  * 'scmnd'. Must not block.
2016  */
2017 static int srpt_xmit_response(struct scst_cmd *scmnd)
2018 {
2019         struct srpt_rdma_ch *ch;
2020         struct srpt_ioctx *ioctx;
2021         struct srp_rsp *srp_rsp;
2022         u64 tag;
2023         int ret = SCST_TGT_RES_SUCCESS;
2024         int dir;
2025         int status;
2026
2027         ioctx = scst_cmd_get_tgt_priv(scmnd);
2028         BUG_ON(!ioctx);
2029
2030         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2031         BUG_ON(!ch);
2032
2033         tag = scst_cmd_get_tag(scmnd);
2034
2035         if (ch->state != RDMA_CHANNEL_LIVE) {
2036                 printk(KERN_ERR PFX
2037                        "%s: tag= %lld channel in bad state %d\n",
2038                        __func__, (unsigned long long)tag, ch->state);
2039
2040                 if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2041                         ret = SCST_TGT_RES_FATAL_ERROR;
2042                 else if (ch->state == RDMA_CHANNEL_CONNECTING)
2043                         ret = SCST_TGT_RES_QUEUE_FULL;
2044
2045                 if (unlikely(scst_cmd_aborted(scmnd)))
2046                         goto out_aborted;
2047
2048                 goto out;
2049         }
2050
2051         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
2052                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
2053
2054         srp_rsp = ioctx->buf;
2055
2056         if (unlikely(scst_cmd_aborted(scmnd))) {
2057                 printk(KERN_ERR PFX
2058                        "%s: tag= %lld already get aborted\n",
2059                        __func__, (unsigned long long)tag);
2060                 goto out_aborted;
2061         }
2062
2063         dir = scst_cmd_get_data_direction(scmnd);
2064         status = scst_cmd_get_status(scmnd) & 0xff;
2065
2066         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2067
2068         if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2069                 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2070                 if (srp_rsp->sense_data_len >
2071                     (MAX_MESSAGE_SIZE - sizeof *srp_rsp))
2072                         srp_rsp->sense_data_len =
2073                             MAX_MESSAGE_SIZE - sizeof *srp_rsp;
2074
2075                 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2076                        srp_rsp->sense_data_len);
2077
2078                 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2079                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2080
2081                 if (!status)
2082                         status = SAM_STAT_CHECK_CONDITION;
2083         }
2084
2085         srp_rsp->status = status;
2086
2087         /* transfer read data if any */
2088         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2089                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2090                 if (ret != SCST_TGT_RES_SUCCESS) {
2091                         printk(KERN_ERR PFX
2092                                "%s: tag= %lld xfer_data failed\n",
2093                                __func__, (unsigned long long)tag);
2094                         goto out;
2095                 }
2096         }
2097
2098         if (srpt_post_send(ch, ioctx,
2099                            sizeof *srp_rsp +
2100                            be32_to_cpu(srp_rsp->sense_data_len))) {
2101                 printk(KERN_ERR PFX "%s: ch->state= %d tag= %lld\n",
2102                        __func__, ch->state,
2103                        (unsigned long long)tag);
2104                 ret = SCST_TGT_RES_FATAL_ERROR;
2105         }
2106
2107 out:
2108         return ret;
2109
2110 out_aborted:
2111         ret = SCST_TGT_RES_SUCCESS;
2112         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
2113         scst_tgt_cmd_done(scmnd, SCST_CONTEXT_SAME);
2114         goto out;
2115 }
2116
2117 /*
2118  * Called by the SCST core to inform ib_srpt that a received task management
2119  * function has been completed. Must not block.
2120  */
2121 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2122 {
2123         struct srpt_rdma_ch *ch;
2124         struct srpt_mgmt_ioctx *mgmt_ioctx;
2125         struct srpt_ioctx *ioctx;
2126
2127         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2128         BUG_ON(!mgmt_ioctx);
2129
2130         ch = mgmt_ioctx->ch;
2131         BUG_ON(!ch);
2132
2133         ioctx = mgmt_ioctx->ioctx;
2134         BUG_ON(!ioctx);
2135
2136         printk(KERN_WARNING PFX
2137                "%s: tsk_mgmt_done for tag= %lld status=%d\n",
2138                __func__, (unsigned long long)mgmt_ioctx->tag,
2139                scst_mgmt_cmd_get_status(mcmnd));
2140
2141         srpt_build_tskmgmt_rsp(ch, ioctx,
2142                                (scst_mgmt_cmd_get_status(mcmnd) ==
2143                                 SCST_MGMT_STATUS_SUCCESS) ?
2144                                SRP_TSK_MGMT_SUCCESS : SRP_TSK_MGMT_FAILED,
2145                                mgmt_ioctx->tag);
2146         srpt_post_send(ch, ioctx, sizeof(struct srp_rsp) + 4);
2147
2148         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2149
2150         kfree(mgmt_ioctx);
2151 }
2152
2153 /*
2154  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2155  * to be freed. May be called in IRQ context.
2156  */
2157 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2158 {
2159         struct srpt_rdma_ch *ch;
2160         struct srpt_ioctx *ioctx;
2161
2162         ioctx = scst_cmd_get_tgt_priv(scmnd);
2163         BUG_ON(!ioctx);
2164
2165         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2166         BUG_ON(!ch);
2167
2168         spin_lock_irq(&ch->spinlock);
2169         list_del(&ioctx->scmnd_list);
2170         ch->active_scmnd_cnt--;
2171         spin_unlock_irq(&ch->spinlock);
2172
2173         srpt_reset_ioctx(ch, ioctx);
2174         scst_cmd_set_tgt_priv(scmnd, NULL);
2175 }
2176
2177 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2178 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2179 static void srpt_refresh_port_work(void *ctx)
2180 #else
2181 static void srpt_refresh_port_work(struct work_struct *work)
2182 #endif
2183 {
2184 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2185         struct srpt_port *sport = (struct srpt_port *)ctx;
2186 #else
2187         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2188 #endif
2189
2190         srpt_refresh_port(sport);
2191 }
2192
2193 /*
2194  * Called by the SCST core to detect target adapters. Returns the number of
2195  * detected target adapters.
2196  */
2197 static int srpt_detect(struct scst_tgt_template *tp)
2198 {
2199         struct srpt_device *sdev;
2200         int count = 0;
2201
2202         TRACE_ENTRY();
2203
2204         list_for_each_entry(sdev, &srpt_devices, list)
2205                 ++count;
2206
2207         TRACE_EXIT();
2208
2209         return count;
2210 }
2211
2212 /*
2213  * Callback function called by the SCST core from scst_unregister() to free up
2214  * the resources associated with device scst_tgt.
2215  */
2216 static int srpt_release(struct scst_tgt *scst_tgt)
2217 {
2218         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2219         struct srpt_rdma_ch *ch, *tmp_ch;
2220
2221         TRACE_ENTRY();
2222
2223         BUG_ON(!scst_tgt);
2224 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2225         WARN_ON(!sdev);
2226         if (!sdev)
2227                 return -ENODEV;
2228 #else
2229         if (WARN_ON(!sdev))
2230                 return -ENODEV;
2231 #endif
2232
2233         srpt_unregister_procfs_entry(scst_tgt->tgtt);
2234
2235         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list)
2236             srpt_release_channel(ch, 1);
2237
2238         srpt_unregister_mad_agent(sdev);
2239
2240         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2241
2242         TRACE_EXIT();
2243
2244         return 0;
2245 }
2246
2247 /*
2248  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2249  * when the module parameter 'thread' is not zero (the default is zero).
2250  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2251  *
2252  * @pre thread != 0
2253  */
2254 static int srpt_ioctx_thread(void *arg)
2255 {
2256         struct srpt_ioctx *ioctx;
2257
2258         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2259         current->flags |= PF_NOFREEZE;
2260
2261         spin_lock_irq(&srpt_thread.thread_lock);
2262         while (!kthread_should_stop()) {
2263                 wait_queue_t wait;
2264                 init_waitqueue_entry(&wait, current);
2265
2266                 if (!srpt_test_ioctx_list()) {
2267                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2268
2269                         for (;;) {
2270                                 set_current_state(TASK_INTERRUPTIBLE);
2271                                 if (srpt_test_ioctx_list())
2272                                         break;
2273                                 spin_unlock_irq(&srpt_thread.thread_lock);
2274                                 schedule();
2275                                 spin_lock_irq(&srpt_thread.thread_lock);
2276                         }
2277                         set_current_state(TASK_RUNNING);
2278                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2279                 }
2280
2281                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2282                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2283                                            struct srpt_ioctx, comp_list);
2284
2285                         list_del(&ioctx->comp_list);
2286
2287                         spin_unlock_irq(&srpt_thread.thread_lock);
2288                         switch (ioctx->op) {
2289                         case IB_WC_SEND:
2290                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2291                                         SCST_CONTEXT_DIRECT);
2292                                 break;
2293                         case IB_WC_RDMA_WRITE:
2294                         case IB_WC_RDMA_READ:
2295                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2296                                 break;
2297                         case IB_WC_RECV:
2298                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2299                                 break;
2300                         default:
2301                                 break;
2302                         }
2303                         spin_lock_irq(&srpt_thread.thread_lock);
2304                 }
2305         }
2306         spin_unlock_irq(&srpt_thread.thread_lock);
2307
2308         return 0;
2309 }
2310
2311 /* SCST target template for the SRP target implementation. */
2312 static struct scst_tgt_template srpt_template = {
2313         .name = DRV_NAME,
2314         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2315         .xmit_response_atomic = 1,
2316         .rdy_to_xfer_atomic = 1,
2317         .no_proc_entry = 0,
2318         .detect = srpt_detect,
2319         .release = srpt_release,
2320         .xmit_response = srpt_xmit_response,
2321         .rdy_to_xfer = srpt_rdy_to_xfer,
2322         .on_free_cmd = srpt_on_free_cmd,
2323         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2324 };
2325
2326 /*
2327  * The callback function srpt_release_class_dev() is called whenever a
2328  * device is removed from the /sys/class/infiniband_srpt device class.
2329  */
2330 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2331 static void srpt_release_class_dev(struct class_device *class_dev)
2332 #else
2333 static void srpt_release_class_dev(struct device *dev)
2334 #endif
2335 {
2336 }
2337
2338 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2339 static int srpt_trace_level_show(struct seq_file *seq, void *v)
2340 {
2341         return scst_proc_log_entry_read(seq, trace_flag, NULL);
2342 }
2343
2344 static ssize_t srpt_proc_trace_level_write(struct file *file,
2345         const char __user *buf, size_t length, loff_t *off)
2346 {
2347         return scst_proc_log_entry_write(file, buf, length, &trace_flag,
2348                 DEFAULT_SRPT_TRACE_FLAGS, NULL);
2349 }
2350
2351 static struct scst_proc_data srpt_log_proc_data = {
2352         SCST_DEF_RW_SEQ_OP(srpt_proc_trace_level_write)
2353         .show = srpt_trace_level_show,
2354 };
2355 #endif
2356
2357 static struct class_attribute srpt_class_attrs[] = {
2358         __ATTR_NULL,
2359 };
2360
2361 static struct class srpt_class = {
2362         .name = "infiniband_srpt",
2363 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2364         .release = srpt_release_class_dev,
2365 #else
2366         .dev_release = srpt_release_class_dev,
2367 #endif
2368         .class_attrs = srpt_class_attrs,
2369 };
2370
2371 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2372 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2373 #else
2374 static ssize_t show_login_info(struct device *dev,
2375                                struct device_attribute *attr, char *buf)
2376 #endif
2377 {
2378         struct srpt_device *sdev =
2379 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2380                 container_of(class_dev, struct srpt_device, class_dev);
2381 #else
2382                 container_of(dev, struct srpt_device, dev);
2383 #endif
2384         struct srpt_port *sport;
2385         int i;
2386         int len = 0;
2387
2388         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2389                 sport = &sdev->port[i];
2390
2391                 len += sprintf(buf + len,
2392                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2393                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2394                                "service_id=%016llx\n",
2395                                (unsigned long long) mellanox_ioc_guid,
2396                                (unsigned long long) mellanox_ioc_guid,
2397                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2398                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2399                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2400                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2401                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2402                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2403                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2404                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2405                                (unsigned long long) mellanox_ioc_guid);
2406         }
2407
2408         return len;
2409 }
2410
2411 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2412 static CLASS_DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2413 #else
2414 static DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2415 #endif
2416
2417 /*
2418  * Callback function called by the InfiniBand core when either an InfiniBand
2419  * device has been added or during the ib_register_client() call for each
2420  * registered InfiniBand device.
2421  */
2422 static void srpt_add_one(struct ib_device *device)
2423 {
2424         struct srpt_device *sdev;
2425         struct srpt_port *sport;
2426         struct ib_srq_init_attr srq_attr;
2427         int i;
2428
2429         TRACE_ENTRY();
2430
2431         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2432         if (!sdev)
2433                 return;
2434
2435         sdev->device = device;
2436
2437 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2438         sdev->class_dev.class = &srpt_class;
2439         sdev->class_dev.dev = device->dma_device;
2440         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2441                  "srpt-%s", device->name);
2442 #else
2443         sdev->dev.class = &srpt_class;
2444         sdev->dev.parent = device->dma_device;
2445 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2446         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2447 #else
2448         snprintf(sdev->init_name, sizeof(sdev->init_name),
2449                  "srpt-%s", device->name);
2450         sdev->dev.init_name = sdev->init_name;
2451 #endif
2452 #endif
2453
2454 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2455         if (class_device_register(&sdev->class_dev))
2456                 goto free_dev;
2457         if (class_device_create_file(&sdev->class_dev,
2458                                      &class_device_attr_login_info))
2459                 goto err_dev;
2460 #else
2461         if (device_register(&sdev->dev))
2462                 goto free_dev;
2463         if (device_create_file(&sdev->dev, &dev_attr_login_info))
2464                 goto err_dev;
2465 #endif
2466
2467         if (ib_query_device(device, &sdev->dev_attr))
2468                 goto err_dev;
2469
2470         sdev->pd = ib_alloc_pd(device);
2471         if (IS_ERR(sdev->pd))
2472                 goto err_dev;
2473
2474         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2475         if (IS_ERR(sdev->mr))
2476                 goto err_pd;
2477
2478         srq_attr.event_handler = srpt_srq_event;
2479         srq_attr.srq_context = (void *)sdev;
2480         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2481         srq_attr.attr.max_sge = 1;
2482         srq_attr.attr.srq_limit = 0;
2483
2484         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2485         if (IS_ERR(sdev->srq))
2486                 goto err_mr;
2487
2488         printk(KERN_DEBUG PFX "%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
2489                __func__, srq_attr.attr.max_wr,
2490               sdev->dev_attr.max_srq_wr, device->name);
2491
2492         if (!mellanox_ioc_guid)
2493                 mellanox_ioc_guid = be64_to_cpu(device->node_guid);
2494
2495         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2496         if (IS_ERR(sdev->cm_id))
2497                 goto err_srq;
2498
2499         /* print out target login information */
2500         printk(KERN_DEBUG PFX "Target login info: id_ext=%016llx,"
2501                 "ioc_guid=%016llx,pkey=ffff,service_id=%016llx\n",
2502                 (unsigned long long) mellanox_ioc_guid,
2503                 (unsigned long long) mellanox_ioc_guid,
2504                 (unsigned long long) mellanox_ioc_guid);
2505
2506         /*
2507          * We do not have a consistent service_id (ie. also id_ext of target_id)
2508          * to identify this target. We currently use the guid of the first HCA
2509          * in the system as service_id; therefore, the target_id will change
2510          * if this HCA is gone bad and replaced by different HCA
2511          */
2512         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(mellanox_ioc_guid), 0, NULL))
2513                 goto err_cm;
2514
2515         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2516                               srpt_event_handler);
2517         if (ib_register_event_handler(&sdev->event_handler))
2518                 goto err_cm;
2519
2520         if (srpt_alloc_ioctx_ring(sdev))
2521                 goto err_event;
2522
2523         INIT_LIST_HEAD(&sdev->rch_list);
2524         spin_lock_init(&sdev->spinlock);
2525
2526         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2527                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2528
2529         list_add_tail(&sdev->list, &srpt_devices);
2530
2531         ib_set_client_data(device, &srpt_client, sdev);
2532
2533         sdev->scst_tgt = scst_register(&srpt_template, NULL);
2534         if (!sdev->scst_tgt) {
2535                 printk(KERN_ERR PFX "SCST registration failed for %s.\n",
2536                         sdev->device->name);
2537                 goto err_ring;
2538         }
2539
2540         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
2541
2542         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
2543                 sport = &sdev->port[i - 1];
2544                 sport->sdev = sdev;
2545                 sport->port = i;
2546 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2547                 /*
2548                  * A vanilla 2.6.19 or older kernel without backported OFED
2549                  * kernel headers.
2550                  */
2551                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
2552 #else
2553                 INIT_WORK(&sport->work, srpt_refresh_port_work);
2554 #endif
2555                 if (srpt_refresh_port(sport)) {
2556                         printk(KERN_ERR PFX "MAD registration failed"
2557                                " for %s-%d.\n", sdev->device->name, i);
2558                         goto err_refresh_port;
2559                 }
2560         }
2561
2562         TRACE_EXIT();
2563
2564         return;
2565
2566 err_refresh_port:
2567         scst_unregister(sdev->scst_tgt);
2568 err_ring:
2569         ib_set_client_data(device, &srpt_client, NULL);
2570         list_del(&sdev->list);
2571         srpt_free_ioctx_ring(sdev);
2572 err_event:
2573         ib_unregister_event_handler(&sdev->event_handler);
2574 err_cm:
2575         ib_destroy_cm_id(sdev->cm_id);
2576 err_srq:
2577         ib_destroy_srq(sdev->srq);
2578 err_mr:
2579         ib_dereg_mr(sdev->mr);
2580 err_pd:
2581         ib_dealloc_pd(sdev->pd);
2582 err_dev:
2583 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2584         class_device_unregister(&sdev->class_dev);
2585 #else
2586         device_unregister(&sdev->dev);
2587 #endif
2588 free_dev:
2589         kfree(sdev);
2590
2591         TRACE_EXIT();
2592 }
2593
2594 /*
2595  * Callback function called by the InfiniBand core when either an InfiniBand
2596  * device has been removed or during the ib_unregister_client() call for each
2597  * registered InfiniBand device.
2598  */
2599 static void srpt_remove_one(struct ib_device *device)
2600 {
2601         int i;
2602         struct srpt_device *sdev;
2603
2604         TRACE_ENTRY();
2605
2606         sdev = ib_get_client_data(device, &srpt_client);
2607 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2608         WARN_ON(!sdev);
2609         if (!sdev)
2610                 return;
2611 #else
2612         if (WARN_ON(!sdev))
2613                 return;
2614 #endif
2615
2616         /*
2617          * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
2618          * finished if it is running.
2619          */
2620         for (i = 0; i < sdev->device->phys_port_cnt; i++)
2621                 cancel_work_sync(&sdev->port[i].work);
2622
2623         scst_unregister(sdev->scst_tgt);
2624         sdev->scst_tgt = NULL;
2625
2626         ib_unregister_event_handler(&sdev->event_handler);
2627         ib_destroy_cm_id(sdev->cm_id);
2628         ib_destroy_srq(sdev->srq);
2629         ib_dereg_mr(sdev->mr);
2630         ib_dealloc_pd(sdev->pd);
2631 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2632         class_device_unregister(&sdev->class_dev);
2633 #else
2634         device_unregister(&sdev->dev);
2635 #endif
2636
2637         srpt_free_ioctx_ring(sdev);
2638         list_del(&sdev->list);
2639         kfree(sdev);
2640
2641         TRACE_EXIT();
2642 }
2643
2644 /**
2645  * Create procfs entries for srpt. Currently the only procfs entry created
2646  * by this function is the "trace_level" entry.
2647  */
2648 static int srpt_register_procfs_entry(struct scst_tgt_template *tgt)
2649 {
2650         int res = 0;
2651 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2652         struct proc_dir_entry *p, *root;
2653
2654         root = scst_proc_get_tgt_root(tgt);
2655         WARN_ON(!root);
2656         if (root) {
2657                 /*
2658                  * Fill in the scst_proc_data::data pointer, which is used in
2659                  * a printk(KERN_INFO ...) statement in
2660                  * scst_proc_log_entry_write() in scst_proc.c.
2661                  */
2662                 srpt_log_proc_data.data = (char *)tgt->name;
2663                 p = scst_create_proc_entry(root, SRPT_PROC_TRACE_LEVEL_NAME,
2664                                            &srpt_log_proc_data);
2665                 if (!p)
2666                         res = -ENOMEM;
2667         } else
2668                 res = -ENOMEM;
2669
2670 #endif
2671         return res;
2672 }
2673
2674 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt)
2675 {
2676 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2677         struct proc_dir_entry *root;
2678
2679         root = scst_proc_get_tgt_root(tgt);
2680         WARN_ON(!root);
2681         if (root)
2682                 remove_proc_entry(SRPT_PROC_TRACE_LEVEL_NAME, root);
2683 #endif
2684 }
2685
2686 /*
2687  * Module initialization.
2688  *
2689  * Note: since ib_register_client() registers callback functions, and since at
2690  * least one of these callback functions (srpt_add_one()) calls SCST functions,
2691  * the SCST target template must be registered before ib_register_client() is
2692  * called.
2693  */
2694 static int __init srpt_init_module(void)
2695 {
2696         int ret;
2697
2698         INIT_LIST_HEAD(&srpt_devices);
2699
2700         ret = class_register(&srpt_class);
2701         if (ret) {
2702                 printk(KERN_ERR PFX "couldn't register class ib_srpt\n");
2703                 goto out;
2704         }
2705
2706         ret = scst_register_target_template(&srpt_template);
2707         if (ret < 0) {
2708                 printk(KERN_ERR PFX "couldn't register with scst\n");
2709                 ret = -ENODEV;
2710                 goto out_unregister_class;
2711         }
2712
2713         ret = srpt_register_procfs_entry(&srpt_template);
2714         if (ret) {
2715                 printk(KERN_ERR PFX "couldn't register procfs entry\n");
2716                 goto out_unregister_target;
2717         }
2718
2719         ret = ib_register_client(&srpt_client);
2720         if (ret) {
2721                 printk(KERN_ERR PFX "couldn't register IB client\n");
2722                 goto out_unregister_target;
2723         }
2724
2725         if (thread) {
2726                 spin_lock_init(&srpt_thread.thread_lock);
2727                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
2728                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
2729                                                  NULL, "srpt_thread");
2730                 if (IS_ERR(srpt_thread.thread)) {
2731                         srpt_thread.thread = NULL;
2732                         thread = 0;
2733                 }
2734         }
2735
2736         return 0;
2737
2738 out_unregister_target:
2739         /*
2740          * Note: the procfs entry is unregistered in srpt_release(), which is
2741          * called by scst_unregister_target_template().
2742          */
2743         scst_unregister_target_template(&srpt_template);
2744 out_unregister_class:
2745         class_unregister(&srpt_class);
2746 out:
2747         return ret;
2748 }
2749
2750 static void __exit srpt_cleanup_module(void)
2751 {
2752         TRACE_ENTRY();
2753
2754         if (srpt_thread.thread)
2755                 kthread_stop(srpt_thread.thread);
2756         ib_unregister_client(&srpt_client);
2757         scst_unregister_target_template(&srpt_template);
2758         class_unregister(&srpt_class);
2759
2760         TRACE_EXIT();
2761 }
2762
2763 module_init(srpt_init_module);
2764 module_exit(srpt_cleanup_module);