- Enabled the LUN reset task management function.
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43 #include <asm/atomic.h>
44 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #endif
48 #include "ib_srpt.h"
49 #include "scst_debug.h"
50
51 /* Name of this kernel module. */
52 #define DRV_NAME                "ib_srpt"
53 /* Prefix for printk() kernel messages. */
54 #define PFX                     DRV_NAME ": "
55 #define DRV_VERSION             "1.0.1"
56 #define DRV_RELDATE             "July 10, 2008"
57 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
58 /* Flags to be used in SCST debug tracing statements. */
59 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
60                                   | TRACE_MGMT | TRACE_SPECIAL)
61 /* Name of the entry that will be created under /proc/scsi_tgt/ib_srpt. */
62 #define SRPT_PROC_TRACE_LEVEL_NAME      "trace_level"
63 #endif
64
65 #define MELLANOX_SRPT_ID_STRING "Mellanox OFED SRP target"
66
67 MODULE_AUTHOR("Vu Pham");
68 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
69                    "v" DRV_VERSION " (" DRV_RELDATE ")");
70 MODULE_LICENSE("Dual BSD/GPL");
71
72 struct srpt_thread {
73         /* Protects thread_ioctx_list. */
74         spinlock_t thread_lock;
75         /* I/O contexts to be processed by the kernel thread. */
76         struct list_head thread_ioctx_list;
77         /* SRPT kernel thread. */
78         struct task_struct *thread;
79 };
80
81 /*
82  * Global Variables
83  */
84
85 static u64 mellanox_ioc_guid;
86 /* List of srpt_device structures. */
87 static struct list_head srpt_devices;
88 static int thread;
89 static struct srpt_thread srpt_thread;
90 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
91 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
92 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
93 module_param(trace_flag, long, 0644);
94 MODULE_PARM_DESC(trace_flag,
95                  "Trace flags for the ib_srpt kernel module.");
96 #endif
97
98 module_param(thread, int, 0444);
99 MODULE_PARM_DESC(thread,
100                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
101                  "where possible");
102
103 static void srpt_add_one(struct ib_device *device);
104 static void srpt_remove_one(struct ib_device *device);
105 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq);
106 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
107 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt);
108
109 static struct ib_client srpt_client = {
110         .name = DRV_NAME,
111         .add = srpt_add_one,
112         .remove = srpt_remove_one
113 };
114
115 /*
116  * Callback function called by the InfiniBand core when an asynchronous IB
117  * event occurs. This callback may occur in interrupt context. See also
118  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
119  * Architecture Specification.
120  */
121 static void srpt_event_handler(struct ib_event_handler *handler,
122                                struct ib_event *event)
123 {
124         struct srpt_device *sdev =
125             ib_get_client_data(event->device, &srpt_client);
126         struct srpt_port *sport;
127
128         if (!sdev || sdev->device != event->device)
129                 return;
130
131         TRACE_DBG("ASYNC event= %d on device= %s",
132                   event->event, sdev->device->name);
133
134         switch (event->event) {
135         case IB_EVENT_PORT_ERR:
136                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
137                         sport = &sdev->port[event->element.port_num - 1];
138                         sport->lid = 0;
139                         sport->sm_lid = 0;
140                 }
141                 break;
142         case IB_EVENT_PORT_ACTIVE:
143         case IB_EVENT_LID_CHANGE:
144         case IB_EVENT_PKEY_CHANGE:
145         case IB_EVENT_SM_CHANGE:
146         case IB_EVENT_CLIENT_REREGISTER:
147                 /*
148                  * Refresh port data asynchronously. Note: it is safe to call
149                  * schedule_work() even if &sport->work is already on the
150                  * global workqueue because schedule_work() tests for the
151                  * work_pending() condition before adding &sport->work to the
152                  * global work queue.
153                  */
154                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
155                         sport = &sdev->port[event->element.port_num - 1];
156                         if (!sport->lid && !sport->sm_lid)
157                                 schedule_work(&sport->work);
158                 }
159                 break;
160         default:
161                 break;
162         }
163
164 }
165
166 /*
167  * Callback function called by the InfiniBand core for SRQ (shared receive
168  * queue) events.
169  */
170 static void srpt_srq_event(struct ib_event *event, void *ctx)
171 {
172         TRACE_DBG("SRQ event %d", event->event);
173 }
174
175 /*
176  * Callback function called by the InfiniBand core for QP (queue pair) events.
177  */
178 static void srpt_qp_event(struct ib_event *event, void *ctx)
179 {
180         struct srpt_rdma_ch *ch = ctx;
181
182         TRACE_DBG("QP event %d on cm_id=%p sess_name=%s state=%d",
183                   event->event, ch->cm_id, ch->sess_name, ch->state);
184
185         switch (event->event) {
186         case IB_EVENT_COMM_EST:
187 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
188                 ib_cm_notify(ch->cm_id, event->event);
189 #else
190                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
191                 printk(KERN_ERR PFX "how to perform ib_cm_notify() on a"
192                         " vanilla 2.6.18 kernel ???\n");
193 #endif
194                 break;
195         case IB_EVENT_QP_LAST_WQE_REACHED:
196                 if (ch->state == RDMA_CHANNEL_LIVE) {
197                         TRACE_DBG("%s", "Schedule CM_DISCONNECT_WORK");
198                         srpt_disconnect_channel(ch, 1);
199                 }
200                 break;
201         default:
202                 break;
203         }
204 }
205
206 /*
207  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
208  * the lowest four bits of value in element slot of the array of four bit
209  * elements called c_list (controller list). The index slot is one-based.
210  *
211  * @pre 1 <= slot && 0 <= value && value < 16
212  */
213 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
214 {
215         u16 id;
216         u8 tmp;
217
218         id = (slot - 1) / 2;
219         if (slot & 0x1) {
220                 tmp = c_list[id] & 0xf;
221                 c_list[id] = (value << 4) | tmp;
222         } else {
223                 tmp = c_list[id] & 0xf0;
224                 c_list[id] = (value & 0xf) | tmp;
225         }
226 }
227
228 /*
229  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
230  * ClassPortInfo in the InfiniBand Architecture Specification.
231  */
232 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
233 {
234         struct ib_class_port_info *cif;
235
236         cif = (struct ib_class_port_info *)mad->data;
237         memset(cif, 0, sizeof *cif);
238         cif->base_version = 1;
239         cif->class_version = 1;
240         cif->resp_time_value = 20;
241
242         mad->mad_hdr.status = 0;
243 }
244
245 /*
246  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
247  * InfiniBand Architecture Specification. See also section B.7,
248  * table B.6 in the T10 SRP r16a document.
249  */
250 static void srpt_get_iou(struct ib_dm_mad *mad)
251 {
252         struct ib_dm_iou_info *ioui;
253         u8 slot;
254         int i;
255
256         ioui = (struct ib_dm_iou_info *)mad->data;
257         ioui->change_id = 1;
258         ioui->max_controllers = 16;
259
260         /* set present for slot 1 and empty for the rest */
261         srpt_set_ioc(ioui->controller_list, 1, 1);
262         for (i = 1, slot = 2; i < 16; i++, slot++)
263                 srpt_set_ioc(ioui->controller_list, slot, 0);
264
265         mad->mad_hdr.status = 0;
266 }
267
268 /*
269  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
270  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
271  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
272  * document.
273  */
274 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
275                          struct ib_dm_mad *mad)
276 {
277         struct ib_dm_ioc_profile *iocp;
278
279         iocp = (struct ib_dm_ioc_profile *)mad->data;
280
281         if (!slot || slot > 16) {
282                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
283                 return;
284         }
285
286         if (slot > 2) {
287                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
288                 return;
289         }
290
291         memset(iocp, 0, sizeof *iocp);
292         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
293         iocp->guid = cpu_to_be64(mellanox_ioc_guid);
294         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
295         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
296         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
297         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
298         iocp->subsys_device_id = 0x0;
299         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
300         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
301         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
302         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
303         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
304         iocp->rdma_read_depth = 4;
305         iocp->send_size = cpu_to_be32(MAX_MESSAGE_SIZE);
306         iocp->rdma_size = cpu_to_be32(MAX_RDMA_SIZE);
307         iocp->num_svc_entries = 1;
308         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
309             SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
310
311         mad->mad_hdr.status = 0;
312 }
313
314 /*
315  * Device management: write ServiceEntries to mad for the given slot. See also
316  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
317  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
318  */
319 static void srpt_get_svc_entries(u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
320 {
321         struct ib_dm_svc_entries *svc_entries;
322
323         if (!slot || slot > 16) {
324                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
325                 return;
326         }
327
328         if (slot > 2 || lo > hi || hi > 1) {
329                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
330                 return;
331         }
332
333         svc_entries = (struct ib_dm_svc_entries *)mad->data;
334         memset(svc_entries, 0, sizeof *svc_entries);
335         svc_entries->service_entries[0].id = cpu_to_be64(mellanox_ioc_guid);
336         snprintf(svc_entries->service_entries[0].name,
337                  sizeof(svc_entries->service_entries[0].name),
338                  "%s%016llx",
339                  SRP_SERVICE_NAME_PREFIX,
340                  (unsigned long long)mellanox_ioc_guid);
341
342         mad->mad_hdr.status = 0;
343 }
344
345 /*
346  * Actual processing of a received MAD *rq_mad received through source port *sp
347  * (MAD = InfiniBand management datagram). The response to be sent back is
348  * written to *rsp_mad.
349  */
350 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
351                                  struct ib_dm_mad *rsp_mad)
352 {
353         u16 attr_id;
354         u32 slot;
355         u8 hi, lo;
356
357         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
358         switch (attr_id) {
359         case DM_ATTR_CLASS_PORT_INFO:
360                 srpt_get_class_port_info(rsp_mad);
361                 break;
362         case DM_ATTR_IOU_INFO:
363                 srpt_get_iou(rsp_mad);
364                 break;
365         case DM_ATTR_IOC_PROFILE:
366                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
367                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
368                 break;
369         case DM_ATTR_SVC_ENTRIES:
370                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
371                 hi = (u8) ((slot >> 8) & 0xff);
372                 lo = (u8) (slot & 0xff);
373                 slot = (u16) ((slot >> 16) & 0xffff);
374                 srpt_get_svc_entries(slot, hi, lo, rsp_mad);
375                 break;
376         default:
377                 rsp_mad->mad_hdr.status =
378                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
379                 break;
380         }
381 }
382
383 /*
384  * Callback function that is called by the InfiniBand core after transmission of
385  * a MAD. (MAD = management datagram; AH = address handle.)
386  */
387 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
388                                   struct ib_mad_send_wc *mad_wc)
389 {
390         ib_destroy_ah(mad_wc->send_buf->ah);
391         ib_free_send_mad(mad_wc->send_buf);
392 }
393
394 /*
395  * Callback function that is called by the InfiniBand core after reception of
396  * a MAD (management datagram).
397  */
398 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
399                                   struct ib_mad_recv_wc *mad_wc)
400 {
401         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
402         struct ib_ah *ah;
403         struct ib_mad_send_buf *rsp;
404         struct ib_dm_mad *dm_mad;
405
406         if (!mad_wc || !mad_wc->recv_buf.mad)
407                 return;
408
409         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
410                                   mad_wc->recv_buf.grh, mad_agent->port_num);
411         if (IS_ERR(ah))
412                 goto err;
413
414         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
415
416         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
417                                  mad_wc->wc->pkey_index, 0,
418                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
419                                  GFP_KERNEL);
420         if (IS_ERR(rsp))
421                 goto err_rsp;
422
423         rsp->ah = ah;
424
425         dm_mad = rsp->mad;
426         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
427         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
428         dm_mad->mad_hdr.status = 0;
429
430         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
431         case IB_MGMT_METHOD_GET:
432                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
433                 break;
434         case IB_MGMT_METHOD_SET:
435                 dm_mad->mad_hdr.status =
436                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
437                 break;
438         default:
439                 dm_mad->mad_hdr.status =
440                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
441                 break;
442         }
443
444         if (!ib_post_send_mad(rsp, NULL)) {
445                 ib_free_recv_mad(mad_wc);
446                 /* will destroy_ah & free_send_mad in send completion */
447                 return;
448         }
449
450         ib_free_send_mad(rsp);
451
452 err_rsp:
453         ib_destroy_ah(ah);
454 err:
455         ib_free_recv_mad(mad_wc);
456 }
457
458 /*
459  * Enable InfiniBand management datagram processing, update the cached sm_lid,
460  * lid and gid values, and register a callback function for processing MADs
461  * on the specified port. It is safe to call this function more than once for
462  * the same port.
463  */
464 static int srpt_refresh_port(struct srpt_port *sport)
465 {
466         struct ib_mad_reg_req reg_req;
467         struct ib_port_modify port_modify;
468         struct ib_port_attr port_attr;
469         int ret;
470
471         memset(&port_modify, 0, sizeof port_modify);
472         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
473         port_modify.clr_port_cap_mask = 0;
474
475         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
476         if (ret)
477                 goto err_mod_port;
478
479         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
480         if (ret)
481                 goto err_query_port;
482
483         sport->sm_lid = port_attr.sm_lid;
484         sport->lid = port_attr.lid;
485
486         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
487         if (ret)
488                 goto err_query_port;
489
490         if (!sport->mad_agent) {
491                 memset(&reg_req, 0, sizeof reg_req);
492                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
493                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
494                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
495                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
496
497                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
498                                                          sport->port,
499                                                          IB_QPT_GSI,
500                                                          &reg_req, 0,
501                                                          srpt_mad_send_handler,
502                                                          srpt_mad_recv_handler,
503                                                          sport);
504                 if (IS_ERR(sport->mad_agent)) {
505                         ret = PTR_ERR(sport->mad_agent);
506                         sport->mad_agent = NULL;
507                         goto err_query_port;
508                 }
509         }
510
511         return 0;
512
513 err_query_port:
514
515         port_modify.set_port_cap_mask = 0;
516         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
517         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
518
519 err_mod_port:
520
521         return ret;
522 }
523
524 /*
525  * Unregister the callback function for processing MADs and disable MAD
526  * processing for all ports of the specified device. It is safe to call this
527  * function more than once for the same device.
528  */
529 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
530 {
531         struct ib_port_modify port_modify = {
532                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
533         };
534         struct srpt_port *sport;
535         int i;
536
537         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
538                 sport = &sdev->port[i - 1];
539                 WARN_ON(sport->port != i);
540                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
541                         printk(KERN_ERR PFX "disabling MAD processing"
542                                " failed.\n");
543                 if (sport->mad_agent) {
544                         ib_unregister_mad_agent(sport->mad_agent);
545                         sport->mad_agent = NULL;
546                 }
547         }
548 }
549
550 /*
551  * Allocate and initialize an SRPT I/O context structure.
552  */
553 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
554 {
555         struct srpt_ioctx *ioctx;
556
557         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
558         if (!ioctx)
559                 goto out;
560
561         ioctx->buf = kzalloc(MAX_MESSAGE_SIZE, GFP_KERNEL);
562         if (!ioctx->buf)
563                 goto out_free_ioctx;
564
565         ioctx->dma = dma_map_single(sdev->device->dma_device, ioctx->buf,
566                                     MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
567 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
568         if (dma_mapping_error(sdev->device->dma_device, ioctx->dma))
569 #else
570         if (dma_mapping_error(ioctx->dma))
571 #endif
572                 goto out_free_buf;
573
574         return ioctx;
575
576 out_free_buf:
577         kfree(ioctx->buf);
578 out_free_ioctx:
579         kfree(ioctx);
580 out:
581         return NULL;
582 }
583
584 /*
585  * Deallocate an SRPT I/O context structure.
586  */
587 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
588 {
589         if (!ioctx)
590                 return;
591
592         dma_unmap_single(sdev->device->dma_device, ioctx->dma,
593                          MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
594         kfree(ioctx->buf);
595         kfree(ioctx);
596 }
597
598 /*
599  * Associate a ring of SRPT I/O context structures with the specified device.
600  */
601 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
602 {
603         int i;
604
605         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
606                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
607
608                 if (!sdev->ioctx_ring[i])
609                         goto err;
610
611                 sdev->ioctx_ring[i]->index = i;
612         }
613
614         return 0;
615
616 err:
617         while (--i > 0) {
618                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
619                 sdev->ioctx_ring[i] = NULL;
620         }
621         return -ENOMEM;
622 }
623
624 /* Free the ring of SRPT I/O context structures. */
625 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
626 {
627         int i;
628
629         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
630                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
631                 sdev->ioctx_ring[i] = NULL;
632         }
633 }
634
635 /*
636  * Post a receive request on the work queue of InfiniBand device 'sdev'.
637  */
638 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
639 {
640         struct ib_sge list;
641         struct ib_recv_wr wr, *bad_wr;
642
643         wr.wr_id = ioctx->index | SRPT_OP_RECV;
644
645         list.addr = ioctx->dma;
646         list.length = MAX_MESSAGE_SIZE;
647         list.lkey = sdev->mr->lkey;
648
649         wr.next = NULL;
650         wr.sg_list = &list;
651         wr.num_sge = 1;
652
653         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
654 }
655
656 /*
657  * Post a send request on the SRPT RDMA channel 'ch'.
658  */
659 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
660                           int len)
661 {
662         struct ib_sge list;
663         struct ib_send_wr wr, *bad_wr;
664         struct srpt_device *sdev = ch->sport->sdev;
665
666         dma_sync_single_for_device(sdev->device->dma_device, ioctx->dma,
667                                    MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
668
669         list.addr = ioctx->dma;
670         list.length = len;
671         list.lkey = sdev->mr->lkey;
672
673         wr.next = NULL;
674         wr.wr_id = ioctx->index;
675         wr.sg_list = &list;
676         wr.num_sge = 1;
677         wr.opcode = IB_WR_SEND;
678         wr.send_flags = IB_SEND_SIGNALED;
679
680         return ib_post_send(ch->qp, &wr, &bad_wr);
681 }
682
683 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
684                              int *ind)
685 {
686         struct srp_indirect_buf *idb;
687         struct srp_direct_buf *db;
688
689         *ind = 0;
690         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
691             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
692                 ioctx->n_rbuf = 1;
693                 ioctx->rbufs = &ioctx->single_rbuf;
694
695                 db = (void *)srp_cmd->add_data;
696                 memcpy(ioctx->rbufs, db, sizeof *db);
697                 ioctx->data_len = be32_to_cpu(db->len);
698         } else {
699                 idb = (void *)srp_cmd->add_data;
700
701                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
702
703                 if (ioctx->n_rbuf >
704                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
705                         *ind = 1;
706                         ioctx->n_rbuf = 0;
707                         goto out;
708                 }
709
710                 if (ioctx->n_rbuf == 1)
711                         ioctx->rbufs = &ioctx->single_rbuf;
712                 else
713                         ioctx->rbufs =
714                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
715                 if (!ioctx->rbufs) {
716                         ioctx->n_rbuf = 0;
717                         return -ENOMEM;
718                 }
719
720                 db = idb->desc_list;
721                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
722                 ioctx->data_len = be32_to_cpu(idb->len);
723         }
724 out:
725         return 0;
726 }
727
728 /*
729  * Modify the attributes of queue pair 'qp': allow local write, remote read,
730  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
731  */
732 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
733 {
734         struct ib_qp_attr *attr;
735         int ret;
736
737         attr = kzalloc(sizeof *attr, GFP_KERNEL);
738         if (!attr)
739                 return -ENOMEM;
740
741         attr->qp_state = IB_QPS_INIT;
742         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
743             IB_ACCESS_REMOTE_WRITE;
744         attr->port_num = ch->sport->port;
745         attr->pkey_index = 0;
746
747         ret = ib_modify_qp(qp, attr,
748                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
749                            IB_QP_PKEY_INDEX);
750
751         kfree(attr);
752         return ret;
753 }
754
755 static int srpt_ch_qp_rtr_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp,
756                               enum ib_qp_state qp_state)
757 {
758         struct ib_qp_attr *qp_attr;
759         int attr_mask;
760         int ret;
761
762         qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
763         if (!qp_attr)
764                 return -ENOMEM;
765
766         qp_attr->qp_state = qp_state;
767         ret = ib_cm_init_qp_attr(ch->cm_id, qp_attr, &attr_mask);
768         if (ret)
769                 goto out;
770
771         if (qp_state == IB_QPS_RTR)
772                 qp_attr->max_dest_rd_atomic = 4;
773         else
774                 qp_attr->max_rd_atomic = 4;
775
776         ret = ib_modify_qp(qp, qp_attr, attr_mask);
777
778 out:
779         kfree(qp_attr);
780         return ret;
781 }
782
783 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
784 {
785         int i;
786
787         if (ioctx->n_rdma_ius > 0 && ioctx->rdma_ius) {
788                 struct rdma_iu *riu = ioctx->rdma_ius;
789
790                 for (i = 0; i < ioctx->n_rdma_ius; ++i, ++riu)
791                         kfree(riu->sge);
792                 kfree(ioctx->rdma_ius);
793         }
794
795         if (ioctx->n_rbuf > 1)
796                 kfree(ioctx->rbufs);
797
798         if (srpt_post_recv(ch->sport->sdev, ioctx))
799                 printk(KERN_ERR PFX "SRQ post_recv failed - this is serious\n");
800                 /* we should queue it back to free_ioctx queue */
801         else
802                 atomic_inc(&ch->req_lim_delta);
803 }
804
805 static void srpt_abort_scst_cmd(struct srpt_device *sdev,
806                                 struct scst_cmd *scmnd,
807                                 bool tell_initiator)
808 {
809         scst_data_direction dir;
810
811         dir = scst_cmd_get_data_direction(scmnd);
812         if (dir != SCST_DATA_NONE) {
813                 dma_unmap_sg(sdev->device->dma_device,
814                              scst_cmd_get_sg(scmnd),
815                              scst_cmd_get_sg_cnt(scmnd),
816                              scst_to_tgt_dma_dir(dir));
817
818                 if (scmnd->state == SCST_CMD_STATE_DATA_WAIT) {
819                         scst_rx_data(scmnd,
820                                      tell_initiator ? SCST_RX_STATUS_ERROR
821                                      : SCST_RX_STATUS_ERROR_FATAL,
822                                      SCST_CONTEXT_THREAD);
823                         goto out;
824                 } else if (scmnd->state == SCST_CMD_STATE_XMIT_WAIT)
825                         ;
826         }
827
828         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_FAILED);
829         scst_tgt_cmd_done(scmnd, scst_estimate_context());
830 out:
831         return;
832 }
833
834 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
835 {
836         struct srpt_ioctx *ioctx;
837         struct srpt_device *sdev = ch->sport->sdev;
838
839         if (wc->wr_id & SRPT_OP_RECV) {
840                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
841                 printk(KERN_ERR PFX "This is serious - SRQ is in bad state\n");
842         } else {
843                 ioctx = sdev->ioctx_ring[wc->wr_id];
844
845                 if (ioctx->scmnd)
846                         srpt_abort_scst_cmd(sdev, ioctx->scmnd, true);
847                 else
848                         srpt_reset_ioctx(ch, ioctx);
849         }
850 }
851
852 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
853                                   struct srpt_ioctx *ioctx,
854                                   enum scst_exec_context context)
855 {
856         if (ioctx->scmnd) {
857                 scst_data_direction dir =
858                         scst_cmd_get_data_direction(ioctx->scmnd);
859
860                 if (dir != SCST_DATA_NONE)
861                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
862                                      scst_cmd_get_sg(ioctx->scmnd),
863                                      scst_cmd_get_sg_cnt(ioctx->scmnd),
864                                      scst_to_tgt_dma_dir(dir));
865
866                 scst_tgt_cmd_done(ioctx->scmnd, context);
867         } else
868                 srpt_reset_ioctx(ch, ioctx);
869 }
870
871 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
872                                   struct srpt_ioctx *ioctx)
873 {
874         if (!ioctx->scmnd) {
875                 srpt_reset_ioctx(ch, ioctx);
876                 return;
877         }
878
879         if (scst_cmd_get_data_direction(ioctx->scmnd) == SCST_DATA_WRITE)
880                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
881                         scst_estimate_context());
882 }
883
884 static void srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
885                                struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
886                                u64 tag)
887 {
888         struct srp_rsp *srp_rsp;
889         struct sense_data *sense;
890         int limit_delta;
891
892         srp_rsp = ioctx->buf;
893         memset(srp_rsp, 0, sizeof *srp_rsp);
894
895         limit_delta = atomic_read(&ch->req_lim_delta);
896         atomic_sub(limit_delta, &ch->req_lim_delta);
897
898         srp_rsp->opcode = SRP_RSP;
899         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
900         srp_rsp->tag = tag;
901
902         if (s_key != NO_SENSE) {
903                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
904                 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
905                 srp_rsp->sense_data_len =
906                     cpu_to_be32(sizeof *sense + (sizeof *sense % 4));
907
908                 sense = (struct sense_data *)(srp_rsp + 1);
909                 sense->err_code = 0x70;
910                 sense->key = s_key;
911                 sense->asc_ascq = s_code;
912         }
913 }
914
915 static void srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
916                                    struct srpt_ioctx *ioctx, u8 rsp_code,
917                                    u64 tag)
918 {
919         struct srp_rsp *srp_rsp;
920         int limit_delta;
921
922         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
923                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
924
925         srp_rsp = ioctx->buf;
926         memset(srp_rsp, 0, sizeof *srp_rsp);
927
928         limit_delta = atomic_read(&ch->req_lim_delta);
929         atomic_sub(limit_delta, &ch->req_lim_delta);
930
931         srp_rsp->opcode = SRP_RSP;
932         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
933         srp_rsp->tag = tag;
934
935         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
936                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
937                 srp_rsp->resp_data_len = cpu_to_be32(4);
938                 srp_rsp->data[3] = rsp_code;
939         }
940 }
941
942 /*
943  * Process SRP_CMD.
944  */
945 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
946 {
947         struct scst_cmd *scmnd = NULL;
948         struct srp_cmd *srp_cmd = NULL;
949         scst_data_direction dir = SCST_DATA_NONE;
950         int indirect_desc = 0;
951         int ret;
952         unsigned long flags;
953
954         srp_cmd = ioctx->buf;
955
956         if (srp_cmd->buf_fmt) {
957                 ret = srpt_get_desc_tbl(ioctx, srp_cmd, &indirect_desc);
958                 if (ret) {
959                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
960                                            NO_ADD_SENSE, srp_cmd->tag);
961                         ((struct srp_rsp *)ioctx->buf)->status =
962                                         SAM_STAT_TASK_SET_FULL;
963                         goto send_rsp;
964                 }
965
966                 if (indirect_desc) {
967                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
968                                            NO_ADD_SENSE, srp_cmd->tag);
969                         ((struct srp_rsp *)ioctx->buf)->status =
970                                         SAM_STAT_TASK_SET_FULL;
971                         goto send_rsp;
972                 }
973
974                 if (srp_cmd->buf_fmt & 0xf)
975                         dir = SCST_DATA_READ;
976                 else if (srp_cmd->buf_fmt >> 4)
977                         dir = SCST_DATA_WRITE;
978                 else
979                         dir = SCST_DATA_NONE;
980         } else
981                 dir = SCST_DATA_NONE;
982
983         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
984                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
985                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
986         if (!scmnd) {
987                 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
988                                    NO_ADD_SENSE, srp_cmd->tag);
989                 ((struct srp_rsp *)ioctx->buf)->status =
990                         SAM_STAT_TASK_SET_FULL;
991                 goto send_rsp;
992         }
993
994         ioctx->scmnd = scmnd;
995
996         switch (srp_cmd->task_attr) {
997         case SRP_CMD_HEAD_OF_Q:
998                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
999                 break;
1000         case SRP_CMD_ORDERED_Q:
1001                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1002                 break;
1003         case SRP_CMD_SIMPLE_Q:
1004                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
1005                 break;
1006         case SRP_CMD_ACA:
1007                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
1008                 break;
1009         default:
1010                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1011                 break;
1012         }
1013
1014         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1015         scst_cmd_set_tgt_priv(scmnd, ioctx);
1016         scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
1017
1018         spin_lock_irqsave(&ch->spinlock, flags);
1019         list_add_tail(&ioctx->scmnd_list, &ch->active_scmnd_list);
1020         ch->active_scmnd_cnt++;
1021         spin_unlock_irqrestore(&ch->spinlock, flags);
1022
1023         scst_cmd_init_done(scmnd, scst_estimate_context());
1024
1025         return 0;
1026
1027 send_rsp:
1028         return -1;
1029 }
1030
1031 /*
1032  * Process SRP_TSK_MGMT. See also table 19 in the T10 SRP r16a document.
1033  */
1034 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1035                                 struct srpt_ioctx *ioctx)
1036 {
1037         struct srp_tsk_mgmt *srp_tsk = NULL;
1038         struct srpt_mgmt_ioctx *mgmt_ioctx;
1039         int ret;
1040
1041         srp_tsk = ioctx->buf;
1042
1043         TRACE_DBG("recv_tsk_mgmt= %d for task_tag= %lld"
1044                   " using tag= %lld cm_id= %p sess= %p",
1045                   srp_tsk->tsk_mgmt_func,
1046                   (unsigned long long) srp_tsk->task_tag,
1047                   (unsigned long long) srp_tsk->tag,
1048                   ch->cm_id, ch->scst_sess);
1049
1050         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1051         if (!mgmt_ioctx) {
1052                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1053                                        srp_tsk->tag);
1054                 goto send_rsp;
1055         }
1056
1057         mgmt_ioctx->ioctx = ioctx;
1058         mgmt_ioctx->ch = ch;
1059         mgmt_ioctx->tag = srp_tsk->tag;
1060
1061         switch (srp_tsk->tsk_mgmt_func) {
1062         case SRP_TSK_ABORT_TASK:
1063                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1064                                           SCST_ABORT_TASK,
1065                                           srp_tsk->task_tag,
1066                                           thread ?
1067                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1068                                           mgmt_ioctx);
1069                 break;
1070         case SRP_TSK_ABORT_TASK_SET:
1071                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1072                                           SCST_ABORT_TASK_SET,
1073                                           (u8 *) &srp_tsk->lun,
1074                                           sizeof srp_tsk->lun,
1075                                           thread ?
1076                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1077                                           mgmt_ioctx);
1078                 break;
1079         case SRP_TSK_CLEAR_TASK_SET:
1080                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1081                                           SCST_CLEAR_TASK_SET,
1082                                           (u8 *) &srp_tsk->lun,
1083                                           sizeof srp_tsk->lun,
1084                                           thread ?
1085                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1086                                           mgmt_ioctx);
1087                 break;
1088         case SRP_TSK_LUN_RESET:
1089                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1090                                           SCST_LUN_RESET,
1091                                           (u8 *) &srp_tsk->lun,
1092                                           sizeof srp_tsk->lun,
1093                                           thread ?
1094                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1095                                           mgmt_ioctx);
1096                 break;
1097         case SRP_TSK_CLEAR_ACA:
1098                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1099                                           SCST_CLEAR_ACA,
1100                                           (u8 *) &srp_tsk->lun,
1101                                           sizeof srp_tsk->lun,
1102                                           thread ?
1103                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1104                                           mgmt_ioctx);
1105                 break;
1106         default:
1107                 srpt_build_tskmgmt_rsp(ch, ioctx,
1108                                        SRP_TSK_MGMT_FUNC_NOT_SUPP,
1109                                        srp_tsk->tag);
1110                 goto send_rsp;
1111         }
1112         return 0;
1113
1114 send_rsp:
1115         return -1;
1116 }
1117
1118 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1119                                struct srpt_ioctx *ioctx)
1120 {
1121         u8 op;
1122         unsigned long flags;
1123
1124         if (ch->state != RDMA_CHANNEL_LIVE) {
1125                 if (ch->state == RDMA_CHANNEL_CONNECTING) {
1126                         spin_lock_irqsave(&ch->spinlock, flags);
1127                         list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1128                         spin_unlock_irqrestore(&ch->spinlock, flags);
1129                 } else
1130                         srpt_reset_ioctx(ch, ioctx);
1131
1132                 return;
1133         }
1134
1135         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
1136                                 MAX_MESSAGE_SIZE, DMA_FROM_DEVICE);
1137
1138         ioctx->data_len = 0;
1139         ioctx->n_rbuf = 0;
1140         ioctx->rbufs = NULL;
1141         ioctx->n_rdma = 0;
1142         ioctx->n_rdma_ius = 0;
1143         ioctx->rdma_ius = NULL;
1144         ioctx->scmnd = NULL;
1145
1146         op = *(u8 *) ioctx->buf;
1147         switch (op) {
1148         case SRP_CMD:
1149                 if (srpt_handle_cmd(ch, ioctx) < 0)
1150                         goto send_rsp;
1151                 break;
1152
1153         case SRP_TSK_MGMT:
1154                 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1155                         goto send_rsp;
1156                 break;
1157
1158         case SRP_I_LOGOUT:
1159         case SRP_AER_REQ:
1160         default:
1161                 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1162                                    ((struct srp_cmd *)ioctx->buf)->tag);
1163
1164                 goto send_rsp;
1165         }
1166
1167         dma_sync_single_for_device(ch->sport->sdev->device->dma_device,
1168                                    ioctx->dma, MAX_MESSAGE_SIZE,
1169                                    DMA_FROM_DEVICE);
1170
1171         return;
1172
1173 send_rsp:
1174         if (ch->state != RDMA_CHANNEL_LIVE ||
1175             srpt_post_send(ch, ioctx,
1176                            sizeof(struct srp_rsp) +
1177                            be32_to_cpu(((struct srp_rsp *)ioctx->buf)->
1178                                        sense_data_len)))
1179                 srpt_reset_ioctx(ch, ioctx);
1180 }
1181
1182 /*
1183  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1184  * should stop.
1185  * @pre thread != 0
1186  */
1187 static inline int srpt_test_ioctx_list(void)
1188 {
1189         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1190                    unlikely(kthread_should_stop()));
1191         return res;
1192 }
1193
1194 /*
1195  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1196  *
1197  * @pre thread != 0
1198  */
1199 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1200 {
1201         unsigned long flags;
1202
1203         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1204         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1205         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1206         wake_up(&ioctx_list_waitQ);
1207 }
1208
1209 /*
1210  * InfiniBand CQ (completion queue) event handler for asynchronous events not
1211  * associated with a completion.
1212  */
1213 static void srpt_completion(struct ib_cq *cq, void *ctx)
1214 {
1215         struct srpt_rdma_ch *ch = ctx;
1216         struct srpt_device *sdev = ch->sport->sdev;
1217         struct ib_wc wc;
1218         struct srpt_ioctx *ioctx;
1219
1220         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1221         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1222                 if (wc.status) {
1223                         printk(KERN_ERR PFX "failed %s status= %d\n",
1224                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1225                                wc.status);
1226                         srpt_handle_err_comp(ch, &wc);
1227                         break;
1228                 }
1229
1230                 if (wc.wr_id & SRPT_OP_RECV) {
1231                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1232                         if (thread) {
1233                                 ioctx->ch = ch;
1234                                 ioctx->op = IB_WC_RECV;
1235                                 srpt_schedule_thread(ioctx);
1236                         } else
1237                                 srpt_handle_new_iu(ch, ioctx);
1238                         continue;
1239                 } else
1240                         ioctx = sdev->ioctx_ring[wc.wr_id];
1241
1242                 if (thread) {
1243                         ioctx->ch = ch;
1244                         ioctx->op = wc.opcode;
1245                         srpt_schedule_thread(ioctx);
1246                 } else {
1247                         switch (wc.opcode) {
1248                         case IB_WC_SEND:
1249                                 srpt_handle_send_comp(ch, ioctx,
1250                                         scst_estimate_context());
1251                                 break;
1252                         case IB_WC_RDMA_WRITE:
1253                         case IB_WC_RDMA_READ:
1254                                 srpt_handle_rdma_comp(ch, ioctx);
1255                                 break;
1256                         default:
1257                                 break;
1258                         }
1259                 }
1260         }
1261 }
1262
1263 /*
1264  * Create a completion queue on the specified device.
1265  */
1266 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1267 {
1268         struct ib_qp_init_attr *qp_init;
1269         struct srpt_device *sdev = ch->sport->sdev;
1270         int cqe;
1271         int ret;
1272
1273         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1274         if (!qp_init)
1275                 return -ENOMEM;
1276
1277         /* Create a completion queue (CQ). */
1278
1279         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1280 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1281         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1282 #else
1283         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1284 #endif
1285         if (IS_ERR(ch->cq)) {
1286                 ret = PTR_ERR(ch->cq);
1287                 printk(KERN_ERR PFX "failed to create_cq cqe= %d ret= %d\n",
1288                         cqe, ret);
1289                 goto out;
1290         }
1291
1292         /* Request completion notification. */
1293
1294         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1295
1296         /* Create a queue pair (QP). */
1297
1298         qp_init->qp_context = (void *)ch;
1299         qp_init->event_handler = srpt_qp_event;
1300         qp_init->send_cq = ch->cq;
1301         qp_init->recv_cq = ch->cq;
1302         qp_init->srq = sdev->srq;
1303         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1304         qp_init->qp_type = IB_QPT_RC;
1305         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1306         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1307
1308         ch->qp = ib_create_qp(sdev->pd, qp_init);
1309         if (IS_ERR(ch->qp)) {
1310                 ret = PTR_ERR(ch->qp);
1311                 ib_destroy_cq(ch->cq);
1312                 printk(KERN_ERR PFX "failed to create_qp ret= %d\n", ret);
1313                 goto out;
1314         }
1315
1316         TRACE_DBG("%s: max_cqe= %d max_sge= %d cm_id= %p",
1317                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1318                ch->cm_id);
1319
1320         /* Modify the attributes and the state of queue pair ch->qp. */
1321
1322         ret = srpt_init_ch_qp(ch, ch->qp);
1323         if (ret) {
1324                 ib_destroy_qp(ch->qp);
1325                 ib_destroy_cq(ch->cq);
1326                 goto out;
1327         }
1328
1329         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1330 out:
1331         kfree(qp_init);
1332         return ret;
1333 }
1334
1335 /**
1336  * Look up the RDMA channel that corresponds to the specified cm_id.
1337  *
1338  * Return NULL if no matching RDMA channel has been found.
1339  */
1340 static struct srpt_rdma_ch *srpt_find_channel(struct ib_cm_id *cm_id)
1341 {
1342         struct srpt_device *sdev = cm_id->context;
1343         struct srpt_rdma_ch *ch;
1344
1345         spin_lock_irq(&sdev->spinlock);
1346         list_for_each_entry(ch, &sdev->rch_list, list) {
1347                 if (ch->cm_id == cm_id) {
1348                         spin_unlock_irq(&sdev->spinlock);
1349                         return ch;
1350                 }
1351         }
1352
1353         spin_unlock_irq(&sdev->spinlock);
1354
1355         return NULL;
1356 }
1357
1358 /** Release all resources associated with the specified RDMA channel. */
1359 static void srpt_release_channel(struct srpt_rdma_ch *ch, int destroy_cmid)
1360 {
1361         TRACE_ENTRY();
1362
1363         spin_lock_irq(&ch->sport->sdev->spinlock);
1364         list_del(&ch->list);
1365         spin_unlock_irq(&ch->sport->sdev->spinlock);
1366
1367         if (ch->cm_id && destroy_cmid) {
1368                 TRACE_DBG("%s: destroy cm_id= %p", __func__, ch->cm_id);
1369                 ib_destroy_cm_id(ch->cm_id);
1370                 ch->cm_id = NULL;
1371         }
1372
1373         ib_destroy_qp(ch->qp);
1374         ib_destroy_cq(ch->cq);
1375
1376         if (ch->scst_sess) {
1377                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1378
1379                 TRACE_DBG("%s: release sess= %p sess_name= %s active_cmd= %d",
1380                           __func__, ch->scst_sess, ch->sess_name,
1381                           ch->active_scmnd_cnt);
1382
1383                 spin_lock_irq(&ch->spinlock);
1384                 list_for_each_entry_safe(ioctx, ioctx_tmp,
1385                                          &ch->active_scmnd_list, scmnd_list) {
1386                         spin_unlock_irq(&ch->spinlock);
1387
1388                         if (ioctx->scmnd)
1389                                 srpt_abort_scst_cmd(ch->sport->sdev,
1390                                                     ioctx->scmnd, true);
1391
1392                         spin_lock_irq(&ch->spinlock);
1393                 }
1394                 WARN_ON(!list_empty(&ch->active_scmnd_list));
1395                 WARN_ON(ch->active_scmnd_cnt != 0);
1396                 spin_unlock_irq(&ch->spinlock);
1397
1398                 scst_unregister_session(ch->scst_sess, 0, NULL);
1399                 ch->scst_sess = NULL;
1400         }
1401
1402         kfree(ch);
1403
1404         TRACE_EXIT();
1405 }
1406
1407 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq)
1408 {
1409         spin_lock_irq(&ch->spinlock);
1410         ch->state = RDMA_CHANNEL_DISCONNECTING;
1411         spin_unlock_irq(&ch->spinlock);
1412
1413         if (dreq)
1414                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
1415         else
1416                 ib_send_cm_drep(ch->cm_id, NULL, 0);
1417
1418         return 0;
1419 }
1420
1421 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1422                             struct ib_cm_req_event_param *param,
1423                             void *private_data)
1424 {
1425         struct srpt_device *sdev = cm_id->context;
1426         struct srp_login_req *req;
1427         struct srp_login_rsp *rsp;
1428         struct srp_login_rej *rej;
1429         struct ib_cm_rep_param *rep_param;
1430         struct srpt_rdma_ch *ch, *tmp_ch;
1431         u32 it_iu_len;
1432         int ret = 0;
1433
1434         if (!sdev || !private_data)
1435                 return -EINVAL;
1436
1437         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1438         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1439         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1440
1441         if (!rsp || !rej || !rep_param) {
1442                 ret = -ENOMEM;
1443                 goto out;
1444         }
1445
1446         req = (struct srp_login_req *)private_data;
1447
1448         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1449
1450         TRACE_DBG("Host login i_port_id=0x%llx:0x%llx t_port_id=0x%llx:0x%llx"
1451             " it_iu_len=%d",
1452             (unsigned long long)
1453             be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1454             (unsigned long long)
1455             be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1456             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1457             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1458             it_iu_len);
1459
1460         if (it_iu_len > MAX_MESSAGE_SIZE || it_iu_len < 64) {
1461                 rej->reason =
1462                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1463                 ret = -EINVAL;
1464                 TRACE_DBG("Reject invalid it_iu_len=%d", it_iu_len);
1465                 goto reject;
1466         }
1467
1468         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1469                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1470
1471                 spin_lock_irq(&sdev->spinlock);
1472
1473                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1474                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1475                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1476                             && param->port == ch->sport->port
1477                             && param->listen_id == ch->sport->sdev->cm_id
1478                             && ch->cm_id) {
1479                                 /* found an existing channel */
1480                                 TRACE_DBG("Found existing channel name= %s"
1481                                           " cm_id= %p state= %d",
1482                                           ch->sess_name, ch->cm_id, ch->state);
1483
1484                                 spin_unlock_irq(&sdev->spinlock);
1485
1486                                 rsp->rsp_flags =
1487                                     SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1488
1489                                 if (ch->state == RDMA_CHANNEL_LIVE)
1490                                         srpt_disconnect_channel(ch, 1);
1491                                 else if (ch->state == RDMA_CHANNEL_CONNECTING) {
1492                                         ib_send_cm_rej(ch->cm_id,
1493                                                        IB_CM_REJ_NO_RESOURCES,
1494                                                        NULL, 0, NULL, 0);
1495                                         srpt_release_channel(ch, 1);
1496                                 }
1497
1498                                 spin_lock_irq(&sdev->spinlock);
1499                         }
1500                 }
1501
1502                 spin_unlock_irq(&sdev->spinlock);
1503
1504         } else
1505                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1506
1507         if (((u64) (*(u64 *) req->target_port_id) !=
1508              cpu_to_be64(mellanox_ioc_guid)) ||
1509             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1510              cpu_to_be64(mellanox_ioc_guid))) {
1511                 rej->reason =
1512                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1513                 ret = -ENOMEM;
1514                 TRACE_DBG("%s", "Reject invalid target_port_id");
1515                 goto reject;
1516         }
1517
1518         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1519         if (!ch) {
1520                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1521                 TRACE_DBG("%s", "Reject failed allocate rdma_ch");
1522                 ret = -ENOMEM;
1523                 goto reject;
1524         }
1525
1526         spin_lock_init(&ch->spinlock);
1527         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1528         memcpy(ch->t_port_id, req->target_port_id, 16);
1529         ch->sport = &sdev->port[param->port - 1];
1530         ch->cm_id = cm_id;
1531         ch->state = RDMA_CHANNEL_CONNECTING;
1532         INIT_LIST_HEAD(&ch->cmd_wait_list);
1533         INIT_LIST_HEAD(&ch->active_scmnd_list);
1534
1535         ret = srpt_create_ch_ib(ch);
1536         if (ret) {
1537                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1538                 TRACE_DBG("%s", "Reject failed to create rdma_ch");
1539                 goto free_ch;
1540         }
1541
1542         ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTR);
1543         if (ret) {
1544                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1545                 TRACE_DBG("Reject failed qp to rtr/rts ret=%d", ret);
1546                 goto destroy_ib;
1547         }
1548
1549         snprintf(ch->sess_name, sizeof(ch->sess_name),
1550                  "0x%016llx%016llx",
1551                  (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1552                  (unsigned long long)be64_to_cpu(*(u64 *)(ch->i_port_id + 8)));
1553
1554         TRACE_DBG("registering session %s", ch->sess_name);
1555
1556         BUG_ON(!sdev->scst_tgt);
1557         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1558                                   NULL, NULL);
1559         if (!ch->scst_sess) {
1560                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1561                 TRACE_DBG("%s", "Failed to create scst sess");
1562                 goto destroy_ib;
1563         }
1564
1565         spin_lock_irq(&sdev->spinlock);
1566         list_add_tail(&ch->list, &sdev->rch_list);
1567         spin_unlock_irq(&sdev->spinlock);
1568
1569         TRACE_DBG("Establish connection sess=%p name=%s cm_id=%p",
1570                   ch->scst_sess, ch->sess_name, ch->cm_id);
1571
1572         scst_sess_set_tgt_priv(ch->scst_sess, ch);
1573
1574         /* create srp_login_response */
1575         rsp->opcode = SRP_LOGIN_RSP;
1576         rsp->tag = req->tag;
1577         rsp->max_it_iu_len = req->req_it_iu_len;
1578         rsp->max_ti_iu_len = req->req_it_iu_len;
1579         rsp->buf_fmt =
1580             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1581         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1582         atomic_set(&ch->req_lim_delta, 0);
1583
1584         /* create cm reply */
1585         rep_param->qp_num = ch->qp->qp_num;
1586         rep_param->private_data = (void *)rsp;
1587         rep_param->private_data_len = sizeof *rsp;
1588         rep_param->rnr_retry_count = 7;
1589         rep_param->flow_control = 1;
1590         rep_param->failover_accepted = 0;
1591         rep_param->srq = 1;
1592         rep_param->responder_resources = 4;
1593         rep_param->initiator_depth = 4;
1594
1595         ret = ib_send_cm_rep(cm_id, rep_param);
1596         if (ret)
1597                 srpt_release_channel(ch, 0);
1598
1599         goto out;
1600
1601 destroy_ib:
1602         ib_destroy_qp(ch->qp);
1603         ib_destroy_cq(ch->cq);
1604
1605 free_ch:
1606         kfree(ch);
1607
1608 reject:
1609         rej->opcode = SRP_LOGIN_REJ;
1610         rej->tag = req->tag;
1611         rej->buf_fmt =
1612             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1613
1614         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1615                              (void *)rej, sizeof *rej);
1616
1617 out:
1618         kfree(rep_param);
1619         kfree(rsp);
1620         kfree(rej);
1621
1622         return ret;
1623 }
1624
1625 /**
1626  * Release the channel with the specified cm_id.
1627  *
1628  * Returns one to indicate that the caller of srpt_cm_handler() should destroy
1629  * the cm_id.
1630  */
1631 static void srpt_find_and_release_channel(struct ib_cm_id *cm_id)
1632 {
1633         struct srpt_rdma_ch *ch;
1634
1635         ch = srpt_find_channel(cm_id);
1636         if (ch)
1637                 srpt_release_channel(ch, 0);
1638 }
1639
1640 static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
1641 {
1642         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1643         srpt_find_and_release_channel(cm_id);
1644 }
1645
1646 static int srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
1647 {
1648         struct srpt_rdma_ch *ch;
1649         int ret;
1650
1651         ch = srpt_find_channel(cm_id);
1652         if (!ch)
1653                 return -EINVAL;
1654
1655         if (ch->state == RDMA_CHANNEL_CONNECTING) {
1656                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1657
1658                 spin_lock_irq(&ch->spinlock);
1659                 ch->state = RDMA_CHANNEL_LIVE;
1660                 spin_unlock_irq(&ch->spinlock);
1661                 ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTS);
1662
1663                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
1664                                          wait_list) {
1665                         list_del(&ioctx->wait_list);
1666                         srpt_handle_new_iu(ch, ioctx);
1667                 }
1668         } else if (ch->state == RDMA_CHANNEL_DISCONNECTING)
1669                 ret = -EAGAIN;
1670         else
1671                 ret = 0;
1672
1673         if (ret) {
1674                 TRACE_DBG("cm_id=%p sess_name=%s state=%d",
1675                           cm_id, ch->sess_name, ch->state);
1676                 srpt_disconnect_channel(ch, 1);
1677         }
1678
1679         return ret;
1680 }
1681
1682 static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
1683 {
1684         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1685         srpt_find_and_release_channel(cm_id);
1686 }
1687
1688 static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
1689 {
1690         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1691         srpt_find_and_release_channel(cm_id);
1692 }
1693
1694 static int srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
1695 {
1696         struct srpt_rdma_ch *ch;
1697         int ret = 0;
1698
1699         ch = srpt_find_channel(cm_id);
1700
1701         if (!ch)
1702                 return -EINVAL;
1703
1704         TRACE_DBG("%s: cm_id= %p ch->state= %d",
1705                  __func__, cm_id, ch->state);
1706
1707         switch (ch->state) {
1708         case RDMA_CHANNEL_LIVE:
1709         case RDMA_CHANNEL_CONNECTING:
1710                 ret = srpt_disconnect_channel(ch, 0);
1711                 break;
1712         case RDMA_CHANNEL_DISCONNECTING:
1713         default:
1714                 break;
1715         }
1716
1717         return ret;
1718 }
1719
1720 static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
1721 {
1722         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1723         srpt_find_and_release_channel(cm_id);
1724 }
1725
1726 /**
1727  * IB connection manager callback function.
1728  *
1729  * A non-zero return value will make the caller destroy the CM ID.
1730  *
1731  * Note: srpt_add_one passes a struct srpt_device* as the third argument to
1732  * the ib_create_cm_id() call.
1733  */
1734 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1735 {
1736         int ret = 0;
1737
1738         switch (event->event) {
1739         case IB_CM_REQ_RECEIVED:
1740                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
1741                                        event->private_data);
1742                 break;
1743         case IB_CM_REJ_RECEIVED:
1744                 srpt_cm_rej_recv(cm_id);
1745                 ret = -EINVAL;
1746                 break;
1747         case IB_CM_RTU_RECEIVED:
1748         case IB_CM_USER_ESTABLISHED:
1749                 ret = srpt_cm_rtu_recv(cm_id);
1750                 break;
1751         case IB_CM_DREQ_RECEIVED:
1752                 ret = srpt_cm_dreq_recv(cm_id);
1753                 break;
1754         case IB_CM_DREP_RECEIVED:
1755                 srpt_cm_drep_recv(cm_id);
1756                 ret = -EINVAL;
1757                 break;
1758         case IB_CM_TIMEWAIT_EXIT:
1759                 srpt_cm_timewait_exit(cm_id);
1760                 ret = -EINVAL;
1761                 break;
1762         case IB_CM_REP_ERROR:
1763                 srpt_cm_rep_error(cm_id);
1764                 ret = -EINVAL;
1765                 break;
1766         default:
1767                 break;
1768         }
1769
1770         return ret;
1771 }
1772
1773 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1774                                  struct srpt_ioctx *ioctx,
1775                                  struct scst_cmd *scmnd)
1776 {
1777         struct scatterlist *scat;
1778         scst_data_direction dir;
1779         struct rdma_iu *riu;
1780         struct srp_direct_buf *db;
1781         dma_addr_t dma_addr;
1782         struct ib_sge *sge;
1783         u64 raddr;
1784         u32 rsize;
1785         u32 tsize;
1786         u32 dma_len;
1787         int count, nrdma;
1788         int i, j, k;
1789
1790         scat = scst_cmd_get_sg(scmnd);
1791         dir = scst_cmd_get_data_direction(scmnd);
1792         count = dma_map_sg(ch->sport->sdev->device->dma_device, scat,
1793                            scst_cmd_get_sg_cnt(scmnd),
1794                            scst_to_tgt_dma_dir(dir));
1795         if (unlikely(!count))
1796                 return -EBUSY;
1797
1798         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
1799                 nrdma = ioctx->n_rdma_ius;
1800         else {
1801                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
1802
1803                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
1804                                           scst_cmd_atomic(scmnd)
1805                                           ? GFP_ATOMIC : GFP_KERNEL);
1806                 if (!ioctx->rdma_ius) {
1807                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1808                                      scat, scst_cmd_get_sg_cnt(scmnd),
1809                                      scst_to_tgt_dma_dir(dir));
1810                         return -ENOMEM;
1811                 }
1812
1813                 ioctx->n_rdma_ius = nrdma;
1814         }
1815
1816         db = ioctx->rbufs;
1817         tsize = (dir == SCST_DATA_READ) ?
1818                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1819         dma_len = sg_dma_len(&scat[0]);
1820         riu = ioctx->rdma_ius;
1821
1822         /*
1823          * For each remote desc - calculate the #ib_sge.
1824          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
1825          *      each remote desc rdma_iu is required a rdma wr;
1826          * else
1827          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
1828          *      another rdma wr
1829          */
1830         for (i = 0, j = 0;
1831              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1832                 rsize = be32_to_cpu(db->len);
1833                 raddr = be64_to_cpu(db->va);
1834                 riu->raddr = raddr;
1835                 riu->rkey = be32_to_cpu(db->key);
1836                 riu->sge_cnt = 0;
1837
1838                 /* calculate how many sge required for this remote_buf */
1839                 while (rsize > 0 && tsize > 0) {
1840
1841                         if (rsize >= dma_len) {
1842                                 tsize -= dma_len;
1843                                 rsize -= dma_len;
1844                                 raddr += dma_len;
1845
1846                                 if (tsize > 0) {
1847                                         ++j;
1848                                         if (j < count)
1849                                                 dma_len = sg_dma_len(&scat[j]);
1850                                 }
1851                         } else {
1852                                 tsize -= rsize;
1853                                 dma_len -= rsize;
1854                                 rsize = 0;
1855                         }
1856
1857                         ++riu->sge_cnt;
1858
1859                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
1860                                 riu->sge =
1861                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
1862                                             scst_cmd_atomic(scmnd)
1863                                             ? GFP_ATOMIC : GFP_KERNEL);
1864                                 if (!riu->sge)
1865                                         goto free_mem;
1866
1867                                 ++ioctx->n_rdma;
1868                                 ++riu;
1869                                 riu->sge_cnt = 0;
1870                                 riu->raddr = raddr;
1871                                 riu->rkey = be32_to_cpu(db->key);
1872                         }
1873                 }
1874
1875                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
1876                                    scst_cmd_atomic(scmnd)
1877                                    ? GFP_ATOMIC : GFP_KERNEL);
1878
1879                 if (!riu->sge)
1880                         goto free_mem;
1881
1882                 ++ioctx->n_rdma;
1883         }
1884
1885         db = ioctx->rbufs;
1886         scat = scst_cmd_get_sg(scmnd);
1887         tsize = (dir == SCST_DATA_READ) ?
1888                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1889         riu = ioctx->rdma_ius;
1890         dma_len = sg_dma_len(&scat[0]);
1891         dma_addr = sg_dma_address(&scat[0]);
1892
1893         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
1894         for (i = 0, j = 0;
1895              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1896                 rsize = be32_to_cpu(db->len);
1897                 sge = riu->sge;
1898                 k = 0;
1899
1900                 while (rsize > 0 && tsize > 0) {
1901                         sge->addr = dma_addr;
1902                         sge->lkey = ch->sport->sdev->mr->lkey;
1903
1904                         if (rsize >= dma_len) {
1905                                 sge->length =
1906                                         (tsize < dma_len) ? tsize : dma_len;
1907                                 tsize -= dma_len;
1908                                 rsize -= dma_len;
1909
1910                                 if (tsize > 0) {
1911                                         ++j;
1912                                         if (j < count) {
1913                                                 dma_len = sg_dma_len(&scat[j]);
1914                                                 dma_addr =
1915                                                     sg_dma_address(&scat[j]);
1916                                         }
1917                                 }
1918                         } else {
1919                                 sge->length = (tsize < rsize) ? tsize : rsize;
1920                                 tsize -= rsize;
1921                                 dma_len -= rsize;
1922                                 dma_addr += rsize;
1923                                 rsize = 0;
1924                         }
1925
1926                         ++k;
1927                         if (k == riu->sge_cnt && rsize > 0) {
1928                                 ++riu;
1929                                 sge = riu->sge;
1930                                 k = 0;
1931                         } else if (rsize > 0)
1932                                 ++sge;
1933                 }
1934         }
1935
1936         return 0;
1937
1938 free_mem:
1939         while (ioctx->n_rdma)
1940                 kfree(ioctx->rdma_ius[ioctx->n_rdma--].sge);
1941
1942         kfree(ioctx->rdma_ius);
1943
1944         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1945                      scat, scst_cmd_get_sg_cnt(scmnd),
1946                      scst_to_tgt_dma_dir(dir));
1947
1948         return -ENOMEM;
1949 }
1950
1951 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1952                               scst_data_direction dir)
1953 {
1954         struct ib_send_wr wr;
1955         struct ib_send_wr *bad_wr;
1956         struct rdma_iu *riu;
1957         int i;
1958         int ret = 0;
1959
1960         riu = ioctx->rdma_ius;
1961         memset(&wr, 0, sizeof wr);
1962
1963         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
1964                 wr.opcode = (dir == SCST_DATA_READ) ?
1965                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
1966                 wr.next = NULL;
1967                 wr.wr_id = ioctx->index;
1968                 wr.wr.rdma.remote_addr = riu->raddr;
1969                 wr.wr.rdma.rkey = riu->rkey;
1970                 wr.num_sge = riu->sge_cnt;
1971                 wr.sg_list = riu->sge;
1972
1973                 /* only get completion event for the last rdma wr */
1974                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
1975                         wr.send_flags = IB_SEND_SIGNALED;
1976
1977                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
1978                 if (ret)
1979                         break;
1980         }
1981
1982         return ret;
1983 }
1984
1985 /*
1986  * Start data reception. Must not block.
1987  */
1988 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1989                           struct scst_cmd *scmnd)
1990 {
1991         int ret;
1992
1993         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
1994         if (ret) {
1995                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1996                 ret = SCST_TGT_RES_QUEUE_FULL;
1997                 goto out;
1998         }
1999
2000         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
2001         if (ret) {
2002                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
2003                 if (ret == -EAGAIN || ret == -ENOMEM)
2004                         ret = SCST_TGT_RES_QUEUE_FULL;
2005                 else
2006                         ret = SCST_TGT_RES_FATAL_ERROR;
2007                 goto out;
2008         }
2009
2010         ret = SCST_TGT_RES_SUCCESS;
2011
2012 out:
2013         return ret;
2014 }
2015
2016 /*
2017  * Called by the SCST core to inform ib_srpt that data reception should start.
2018  * Must not block.
2019  */
2020 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
2021 {
2022         struct srpt_rdma_ch *ch;
2023         struct srpt_ioctx *ioctx;
2024
2025         ioctx = scst_cmd_get_tgt_priv(scmnd);
2026         BUG_ON(!ioctx);
2027
2028         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2029         BUG_ON(!ch);
2030
2031         if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2032                 return SCST_TGT_RES_FATAL_ERROR;
2033         else if (ch->state == RDMA_CHANNEL_CONNECTING)
2034                 return SCST_TGT_RES_QUEUE_FULL;
2035
2036         return srpt_xfer_data(ch, ioctx, scmnd);
2037 }
2038
2039 /*
2040  * Called by the SCST core. Transmits the response buffer and status held in
2041  * 'scmnd'. Must not block.
2042  */
2043 static int srpt_xmit_response(struct scst_cmd *scmnd)
2044 {
2045         struct srpt_rdma_ch *ch;
2046         struct srpt_ioctx *ioctx;
2047         struct srp_rsp *srp_rsp;
2048         u64 tag;
2049         int ret = SCST_TGT_RES_SUCCESS;
2050         int dir;
2051         int status;
2052
2053         ioctx = scst_cmd_get_tgt_priv(scmnd);
2054         BUG_ON(!ioctx);
2055
2056         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2057         BUG_ON(!ch);
2058
2059         tag = scst_cmd_get_tag(scmnd);
2060
2061         if (ch->state != RDMA_CHANNEL_LIVE) {
2062                 printk(KERN_ERR PFX
2063                        "%s: tag= %lld channel in bad state %d\n",
2064                        __func__, (unsigned long long)tag, ch->state);
2065
2066                 if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2067                         ret = SCST_TGT_RES_FATAL_ERROR;
2068                 else if (ch->state == RDMA_CHANNEL_CONNECTING)
2069                         ret = SCST_TGT_RES_QUEUE_FULL;
2070
2071                 if (unlikely(scst_cmd_aborted(scmnd)))
2072                         goto out_aborted;
2073
2074                 goto out;
2075         }
2076
2077         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
2078                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
2079
2080         srp_rsp = ioctx->buf;
2081
2082         if (unlikely(scst_cmd_aborted(scmnd))) {
2083                 printk(KERN_ERR PFX
2084                        "%s: tag= %lld already get aborted\n",
2085                        __func__, (unsigned long long)tag);
2086                 goto out_aborted;
2087         }
2088
2089         dir = scst_cmd_get_data_direction(scmnd);
2090         status = scst_cmd_get_status(scmnd) & 0xff;
2091
2092         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2093
2094         if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2095                 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2096                 if (srp_rsp->sense_data_len >
2097                     (MAX_MESSAGE_SIZE - sizeof *srp_rsp))
2098                         srp_rsp->sense_data_len =
2099                             MAX_MESSAGE_SIZE - sizeof *srp_rsp;
2100
2101                 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2102                        srp_rsp->sense_data_len);
2103
2104                 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2105                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2106
2107                 if (!status)
2108                         status = SAM_STAT_CHECK_CONDITION;
2109         }
2110
2111         srp_rsp->status = status;
2112
2113         /* transfer read data if any */
2114         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2115                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2116                 if (ret != SCST_TGT_RES_SUCCESS) {
2117                         printk(KERN_ERR PFX
2118                                "%s: tag= %lld xfer_data failed\n",
2119                                __func__, (unsigned long long)tag);
2120                         goto out;
2121                 }
2122         }
2123
2124         if (srpt_post_send(ch, ioctx,
2125                            sizeof *srp_rsp +
2126                            be32_to_cpu(srp_rsp->sense_data_len))) {
2127                 printk(KERN_ERR PFX "%s: ch->state= %d tag= %lld\n",
2128                        __func__, ch->state,
2129                        (unsigned long long)tag);
2130                 ret = SCST_TGT_RES_FATAL_ERROR;
2131         }
2132
2133 out:
2134         return ret;
2135
2136 out_aborted:
2137         ret = SCST_TGT_RES_SUCCESS;
2138         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
2139         scst_tgt_cmd_done(scmnd, SCST_CONTEXT_SAME);
2140         goto out;
2141 }
2142
2143 /*
2144  * Called by the SCST core to inform ib_srpt that a received task management
2145  * function has been completed. Must not block.
2146  */
2147 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2148 {
2149         struct srpt_rdma_ch *ch;
2150         struct srpt_mgmt_ioctx *mgmt_ioctx;
2151         struct srpt_ioctx *ioctx;
2152
2153         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2154         BUG_ON(!mgmt_ioctx);
2155
2156         ch = mgmt_ioctx->ch;
2157         BUG_ON(!ch);
2158
2159         ioctx = mgmt_ioctx->ioctx;
2160         BUG_ON(!ioctx);
2161
2162         printk(KERN_WARNING PFX
2163                "%s: tsk_mgmt_done for tag= %lld status=%d\n",
2164                __func__, (unsigned long long)mgmt_ioctx->tag,
2165                scst_mgmt_cmd_get_status(mcmnd));
2166
2167         srpt_build_tskmgmt_rsp(ch, ioctx,
2168                                (scst_mgmt_cmd_get_status(mcmnd) ==
2169                                 SCST_MGMT_STATUS_SUCCESS) ?
2170                                SRP_TSK_MGMT_SUCCESS : SRP_TSK_MGMT_FAILED,
2171                                mgmt_ioctx->tag);
2172         srpt_post_send(ch, ioctx, sizeof(struct srp_rsp) + 4);
2173
2174         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2175
2176         kfree(mgmt_ioctx);
2177 }
2178
2179 /*
2180  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2181  * to be freed. May be called in IRQ context.
2182  */
2183 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2184 {
2185         struct srpt_rdma_ch *ch;
2186         struct srpt_ioctx *ioctx;
2187
2188         ioctx = scst_cmd_get_tgt_priv(scmnd);
2189         BUG_ON(!ioctx);
2190
2191         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2192         BUG_ON(!ch);
2193
2194         spin_lock_irq(&ch->spinlock);
2195         list_del(&ioctx->scmnd_list);
2196         ch->active_scmnd_cnt--;
2197         spin_unlock_irq(&ch->spinlock);
2198
2199         srpt_reset_ioctx(ch, ioctx);
2200         scst_cmd_set_tgt_priv(scmnd, NULL);
2201 }
2202
2203 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2204 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2205 static void srpt_refresh_port_work(void *ctx)
2206 #else
2207 static void srpt_refresh_port_work(struct work_struct *work)
2208 #endif
2209 {
2210 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2211         struct srpt_port *sport = (struct srpt_port *)ctx;
2212 #else
2213         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2214 #endif
2215
2216         srpt_refresh_port(sport);
2217 }
2218
2219 /*
2220  * Called by the SCST core to detect target adapters. Returns the number of
2221  * detected target adapters.
2222  */
2223 static int srpt_detect(struct scst_tgt_template *tp)
2224 {
2225         struct srpt_device *sdev;
2226         int count = 0;
2227
2228         TRACE_ENTRY();
2229
2230         list_for_each_entry(sdev, &srpt_devices, list)
2231                 ++count;
2232
2233         TRACE_EXIT();
2234
2235         return count;
2236 }
2237
2238 /*
2239  * Callback function called by the SCST core from scst_unregister() to free up
2240  * the resources associated with device scst_tgt.
2241  */
2242 static int srpt_release(struct scst_tgt *scst_tgt)
2243 {
2244         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2245         struct srpt_rdma_ch *ch, *tmp_ch;
2246
2247         TRACE_ENTRY();
2248
2249         BUG_ON(!scst_tgt);
2250 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2251         WARN_ON(!sdev);
2252         if (!sdev)
2253                 return -ENODEV;
2254 #else
2255         if (WARN_ON(!sdev))
2256                 return -ENODEV;
2257 #endif
2258
2259         srpt_unregister_procfs_entry(scst_tgt->tgtt);
2260
2261         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list)
2262                 srpt_release_channel(ch, 1);
2263
2264         srpt_unregister_mad_agent(sdev);
2265
2266         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2267
2268         TRACE_EXIT();
2269
2270         return 0;
2271 }
2272
2273 /*
2274  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2275  * when the module parameter 'thread' is not zero (the default is zero).
2276  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2277  *
2278  * @pre thread != 0
2279  */
2280 static int srpt_ioctx_thread(void *arg)
2281 {
2282         struct srpt_ioctx *ioctx;
2283
2284         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2285         current->flags |= PF_NOFREEZE;
2286
2287         spin_lock_irq(&srpt_thread.thread_lock);
2288         while (!kthread_should_stop()) {
2289                 wait_queue_t wait;
2290                 init_waitqueue_entry(&wait, current);
2291
2292                 if (!srpt_test_ioctx_list()) {
2293                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2294
2295                         for (;;) {
2296                                 set_current_state(TASK_INTERRUPTIBLE);
2297                                 if (srpt_test_ioctx_list())
2298                                         break;
2299                                 spin_unlock_irq(&srpt_thread.thread_lock);
2300                                 schedule();
2301                                 spin_lock_irq(&srpt_thread.thread_lock);
2302                         }
2303                         set_current_state(TASK_RUNNING);
2304                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2305                 }
2306
2307                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2308                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2309                                            struct srpt_ioctx, comp_list);
2310
2311                         list_del(&ioctx->comp_list);
2312
2313                         spin_unlock_irq(&srpt_thread.thread_lock);
2314                         switch (ioctx->op) {
2315                         case IB_WC_SEND:
2316                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2317                                         SCST_CONTEXT_DIRECT);
2318                                 break;
2319                         case IB_WC_RDMA_WRITE:
2320                         case IB_WC_RDMA_READ:
2321                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2322                                 break;
2323                         case IB_WC_RECV:
2324                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2325                                 break;
2326                         default:
2327                                 break;
2328                         }
2329                         spin_lock_irq(&srpt_thread.thread_lock);
2330                 }
2331         }
2332         spin_unlock_irq(&srpt_thread.thread_lock);
2333
2334         return 0;
2335 }
2336
2337 /* SCST target template for the SRP target implementation. */
2338 static struct scst_tgt_template srpt_template = {
2339         .name = DRV_NAME,
2340         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2341         .xmit_response_atomic = 1,
2342         .rdy_to_xfer_atomic = 1,
2343         .no_proc_entry = 0,
2344         .detect = srpt_detect,
2345         .release = srpt_release,
2346         .xmit_response = srpt_xmit_response,
2347         .rdy_to_xfer = srpt_rdy_to_xfer,
2348         .on_free_cmd = srpt_on_free_cmd,
2349         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2350 };
2351
2352 /*
2353  * The callback function srpt_release_class_dev() is called whenever a
2354  * device is removed from the /sys/class/infiniband_srpt device class.
2355  */
2356 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2357 static void srpt_release_class_dev(struct class_device *class_dev)
2358 #else
2359 static void srpt_release_class_dev(struct device *dev)
2360 #endif
2361 {
2362 }
2363
2364 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2365 static int srpt_trace_level_show(struct seq_file *seq, void *v)
2366 {
2367         return scst_proc_log_entry_read(seq, trace_flag, NULL);
2368 }
2369
2370 static ssize_t srpt_proc_trace_level_write(struct file *file,
2371         const char __user *buf, size_t length, loff_t *off)
2372 {
2373         return scst_proc_log_entry_write(file, buf, length, &trace_flag,
2374                 DEFAULT_SRPT_TRACE_FLAGS, NULL);
2375 }
2376
2377 static struct scst_proc_data srpt_log_proc_data = {
2378         SCST_DEF_RW_SEQ_OP(srpt_proc_trace_level_write)
2379         .show = srpt_trace_level_show,
2380 };
2381 #endif
2382
2383 static struct class_attribute srpt_class_attrs[] = {
2384         __ATTR_NULL,
2385 };
2386
2387 static struct class srpt_class = {
2388         .name = "infiniband_srpt",
2389 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2390         .release = srpt_release_class_dev,
2391 #else
2392         .dev_release = srpt_release_class_dev,
2393 #endif
2394         .class_attrs = srpt_class_attrs,
2395 };
2396
2397 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2398 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2399 #else
2400 static ssize_t show_login_info(struct device *dev,
2401                                struct device_attribute *attr, char *buf)
2402 #endif
2403 {
2404         struct srpt_device *sdev =
2405 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2406                 container_of(class_dev, struct srpt_device, class_dev);
2407 #else
2408                 container_of(dev, struct srpt_device, dev);
2409 #endif
2410         struct srpt_port *sport;
2411         int i;
2412         int len = 0;
2413
2414         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2415                 sport = &sdev->port[i];
2416
2417                 len += sprintf(buf + len,
2418                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2419                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2420                                "service_id=%016llx\n",
2421                                (unsigned long long) mellanox_ioc_guid,
2422                                (unsigned long long) mellanox_ioc_guid,
2423                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2424                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2425                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2426                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2427                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2428                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2429                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2430                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2431                                (unsigned long long) mellanox_ioc_guid);
2432         }
2433
2434         return len;
2435 }
2436
2437 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2438 static CLASS_DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2439 #else
2440 static DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2441 #endif
2442
2443 /*
2444  * Callback function called by the InfiniBand core when either an InfiniBand
2445  * device has been added or during the ib_register_client() call for each
2446  * registered InfiniBand device.
2447  */
2448 static void srpt_add_one(struct ib_device *device)
2449 {
2450         struct srpt_device *sdev;
2451         struct srpt_port *sport;
2452         struct ib_srq_init_attr srq_attr;
2453         int i;
2454
2455         TRACE_ENTRY();
2456
2457         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2458         if (!sdev)
2459                 return;
2460
2461         sdev->device = device;
2462
2463 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2464         sdev->class_dev.class = &srpt_class;
2465         sdev->class_dev.dev = device->dma_device;
2466         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2467                  "srpt-%s", device->name);
2468 #else
2469         sdev->dev.class = &srpt_class;
2470         sdev->dev.parent = device->dma_device;
2471 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2472         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2473 #else
2474         snprintf(sdev->init_name, sizeof(sdev->init_name),
2475                  "srpt-%s", device->name);
2476         sdev->dev.init_name = sdev->init_name;
2477 #endif
2478 #endif
2479
2480 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2481         if (class_device_register(&sdev->class_dev))
2482                 goto free_dev;
2483         if (class_device_create_file(&sdev->class_dev,
2484                                      &class_device_attr_login_info))
2485                 goto err_dev;
2486 #else
2487         if (device_register(&sdev->dev))
2488                 goto free_dev;
2489         if (device_create_file(&sdev->dev, &dev_attr_login_info))
2490                 goto err_dev;
2491 #endif
2492
2493         if (ib_query_device(device, &sdev->dev_attr))
2494                 goto err_dev;
2495
2496         sdev->pd = ib_alloc_pd(device);
2497         if (IS_ERR(sdev->pd))
2498                 goto err_dev;
2499
2500         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2501         if (IS_ERR(sdev->mr))
2502                 goto err_pd;
2503
2504         srq_attr.event_handler = srpt_srq_event;
2505         srq_attr.srq_context = (void *)sdev;
2506         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2507         srq_attr.attr.max_sge = 1;
2508         srq_attr.attr.srq_limit = 0;
2509
2510         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2511         if (IS_ERR(sdev->srq))
2512                 goto err_mr;
2513
2514         TRACE_DBG("%s: create SRQ #wr= %d max_allow=%d dev= %s",
2515                __func__, srq_attr.attr.max_wr,
2516               sdev->dev_attr.max_srq_wr, device->name);
2517
2518         if (!mellanox_ioc_guid)
2519                 mellanox_ioc_guid = be64_to_cpu(device->node_guid);
2520
2521         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2522         if (IS_ERR(sdev->cm_id))
2523                 goto err_srq;
2524
2525         /* print out target login information */
2526         TRACE_DBG("Target login info: id_ext=%016llx,"
2527                   "ioc_guid=%016llx,pkey=ffff,service_id=%016llx",
2528                   (unsigned long long) mellanox_ioc_guid,
2529                   (unsigned long long) mellanox_ioc_guid,
2530                   (unsigned long long) mellanox_ioc_guid);
2531
2532         /*
2533          * We do not have a consistent service_id (ie. also id_ext of target_id)
2534          * to identify this target. We currently use the guid of the first HCA
2535          * in the system as service_id; therefore, the target_id will change
2536          * if this HCA is gone bad and replaced by different HCA
2537          */
2538         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(mellanox_ioc_guid), 0, NULL))
2539                 goto err_cm;
2540
2541         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2542                               srpt_event_handler);
2543         if (ib_register_event_handler(&sdev->event_handler))
2544                 goto err_cm;
2545
2546         if (srpt_alloc_ioctx_ring(sdev))
2547                 goto err_event;
2548
2549         INIT_LIST_HEAD(&sdev->rch_list);
2550         spin_lock_init(&sdev->spinlock);
2551
2552         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2553                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2554
2555         list_add_tail(&sdev->list, &srpt_devices);
2556
2557         ib_set_client_data(device, &srpt_client, sdev);
2558
2559         sdev->scst_tgt = scst_register(&srpt_template, NULL);
2560         if (!sdev->scst_tgt) {
2561                 printk(KERN_ERR PFX "SCST registration failed for %s.\n",
2562                         sdev->device->name);
2563                 goto err_ring;
2564         }
2565
2566         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
2567
2568         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
2569                 sport = &sdev->port[i - 1];
2570                 sport->sdev = sdev;
2571                 sport->port = i;
2572 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2573                 /*
2574                  * A vanilla 2.6.19 or older kernel without backported OFED
2575                  * kernel headers.
2576                  */
2577                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
2578 #else
2579                 INIT_WORK(&sport->work, srpt_refresh_port_work);
2580 #endif
2581                 if (srpt_refresh_port(sport)) {
2582                         printk(KERN_ERR PFX "MAD registration failed"
2583                                " for %s-%d.\n", sdev->device->name, i);
2584                         goto err_refresh_port;
2585                 }
2586         }
2587
2588         TRACE_EXIT();
2589
2590         return;
2591
2592 err_refresh_port:
2593         scst_unregister(sdev->scst_tgt);
2594 err_ring:
2595         ib_set_client_data(device, &srpt_client, NULL);
2596         list_del(&sdev->list);
2597         srpt_free_ioctx_ring(sdev);
2598 err_event:
2599         ib_unregister_event_handler(&sdev->event_handler);
2600 err_cm:
2601         ib_destroy_cm_id(sdev->cm_id);
2602 err_srq:
2603         ib_destroy_srq(sdev->srq);
2604 err_mr:
2605         ib_dereg_mr(sdev->mr);
2606 err_pd:
2607         ib_dealloc_pd(sdev->pd);
2608 err_dev:
2609 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2610         class_device_unregister(&sdev->class_dev);
2611 #else
2612         device_unregister(&sdev->dev);
2613 #endif
2614 free_dev:
2615         kfree(sdev);
2616
2617         TRACE_EXIT();
2618 }
2619
2620 /*
2621  * Callback function called by the InfiniBand core when either an InfiniBand
2622  * device has been removed or during the ib_unregister_client() call for each
2623  * registered InfiniBand device.
2624  */
2625 static void srpt_remove_one(struct ib_device *device)
2626 {
2627         int i;
2628         struct srpt_device *sdev;
2629
2630         TRACE_ENTRY();
2631
2632         sdev = ib_get_client_data(device, &srpt_client);
2633 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2634         WARN_ON(!sdev);
2635         if (!sdev)
2636                 return;
2637 #else
2638         if (WARN_ON(!sdev))
2639                 return;
2640 #endif
2641
2642         /*
2643          * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
2644          * finished if it is running.
2645          */
2646         for (i = 0; i < sdev->device->phys_port_cnt; i++)
2647                 cancel_work_sync(&sdev->port[i].work);
2648
2649         scst_unregister(sdev->scst_tgt);
2650         sdev->scst_tgt = NULL;
2651
2652         ib_unregister_event_handler(&sdev->event_handler);
2653         ib_destroy_cm_id(sdev->cm_id);
2654         ib_destroy_srq(sdev->srq);
2655         ib_dereg_mr(sdev->mr);
2656         ib_dealloc_pd(sdev->pd);
2657 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2658         class_device_unregister(&sdev->class_dev);
2659 #else
2660         device_unregister(&sdev->dev);
2661 #endif
2662
2663         srpt_free_ioctx_ring(sdev);
2664         list_del(&sdev->list);
2665         kfree(sdev);
2666
2667         TRACE_EXIT();
2668 }
2669
2670 /**
2671  * Create procfs entries for srpt. Currently the only procfs entry created
2672  * by this function is the "trace_level" entry.
2673  */
2674 static int srpt_register_procfs_entry(struct scst_tgt_template *tgt)
2675 {
2676         int res = 0;
2677 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2678         struct proc_dir_entry *p, *root;
2679
2680         root = scst_proc_get_tgt_root(tgt);
2681         WARN_ON(!root);
2682         if (root) {
2683                 /*
2684                  * Fill in the scst_proc_data::data pointer, which is used in
2685                  * a printk(KERN_INFO ...) statement in
2686                  * scst_proc_log_entry_write() in scst_proc.c.
2687                  */
2688                 srpt_log_proc_data.data = (char *)tgt->name;
2689                 p = scst_create_proc_entry(root, SRPT_PROC_TRACE_LEVEL_NAME,
2690                                            &srpt_log_proc_data);
2691                 if (!p)
2692                         res = -ENOMEM;
2693         } else
2694                 res = -ENOMEM;
2695
2696 #endif
2697         return res;
2698 }
2699
2700 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt)
2701 {
2702 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2703         struct proc_dir_entry *root;
2704
2705         root = scst_proc_get_tgt_root(tgt);
2706         WARN_ON(!root);
2707         if (root)
2708                 remove_proc_entry(SRPT_PROC_TRACE_LEVEL_NAME, root);
2709 #endif
2710 }
2711
2712 /*
2713  * Module initialization.
2714  *
2715  * Note: since ib_register_client() registers callback functions, and since at
2716  * least one of these callback functions (srpt_add_one()) calls SCST functions,
2717  * the SCST target template must be registered before ib_register_client() is
2718  * called.
2719  */
2720 static int __init srpt_init_module(void)
2721 {
2722         int ret;
2723
2724         INIT_LIST_HEAD(&srpt_devices);
2725
2726         ret = class_register(&srpt_class);
2727         if (ret) {
2728                 printk(KERN_ERR PFX "couldn't register class ib_srpt\n");
2729                 goto out;
2730         }
2731
2732         ret = scst_register_target_template(&srpt_template);
2733         if (ret < 0) {
2734                 printk(KERN_ERR PFX "couldn't register with scst\n");
2735                 ret = -ENODEV;
2736                 goto out_unregister_class;
2737         }
2738
2739         ret = srpt_register_procfs_entry(&srpt_template);
2740         if (ret) {
2741                 printk(KERN_ERR PFX "couldn't register procfs entry\n");
2742                 goto out_unregister_target;
2743         }
2744
2745         ret = ib_register_client(&srpt_client);
2746         if (ret) {
2747                 printk(KERN_ERR PFX "couldn't register IB client\n");
2748                 goto out_unregister_target;
2749         }
2750
2751         if (thread) {
2752                 spin_lock_init(&srpt_thread.thread_lock);
2753                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
2754                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
2755                                                  NULL, "srpt_thread");
2756                 if (IS_ERR(srpt_thread.thread)) {
2757                         srpt_thread.thread = NULL;
2758                         thread = 0;
2759                 }
2760         }
2761
2762         return 0;
2763
2764 out_unregister_target:
2765         /*
2766          * Note: the procfs entry is unregistered in srpt_release(), which is
2767          * called by scst_unregister_target_template().
2768          */
2769         scst_unregister_target_template(&srpt_template);
2770 out_unregister_class:
2771         class_unregister(&srpt_class);
2772 out:
2773         return ret;
2774 }
2775
2776 static void __exit srpt_cleanup_module(void)
2777 {
2778         TRACE_ENTRY();
2779
2780         if (srpt_thread.thread)
2781                 kthread_stop(srpt_thread.thread);
2782         ib_unregister_client(&srpt_client);
2783         scst_unregister_target_template(&srpt_template);
2784         class_unregister(&srpt_class);
2785
2786         TRACE_EXIT();
2787 }
2788
2789 module_init(srpt_init_module);
2790 module_exit(srpt_cleanup_module);