f7780a321beabf158a7da3cc0c4011b1d5b9799f
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43 #include <asm/atomic.h>
44 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #endif
48 #include "ib_srpt.h"
49 #include "scst_debug.h"
50
51 /* Name of this kernel module. */
52 #define DRV_NAME                "ib_srpt"
53 /* Prefix for printk() kernel messages. */
54 #define PFX                     DRV_NAME ": "
55 #define DRV_VERSION             "1.0.1"
56 #define DRV_RELDATE             "July 10, 2008"
57 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
58 /* Flags to be used in SCST debug tracing statements. */
59 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
60                                   | TRACE_MGMT | TRACE_SPECIAL)
61 /* Name of the entry that will be created under /proc/scsi_tgt/ib_srpt. */
62 #define SRPT_PROC_TRACE_LEVEL_NAME      "trace_level"
63 #endif
64
65 #define MELLANOX_SRPT_ID_STRING "Mellanox OFED SRP target"
66
67 MODULE_AUTHOR("Vu Pham");
68 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
69                    "v" DRV_VERSION " (" DRV_RELDATE ")");
70 MODULE_LICENSE("Dual BSD/GPL");
71
72 struct srpt_thread {
73         /* Protects thread_ioctx_list. */
74         spinlock_t thread_lock;
75         /* I/O contexts to be processed by the kernel thread. */
76         struct list_head thread_ioctx_list;
77         /* SRPT kernel thread. */
78         struct task_struct *thread;
79 };
80
81 /*
82  * Global Variables
83  */
84
85 static u64 srpt_service_guid;
86 /* List of srpt_device structures. */
87 static atomic_t srpt_device_count;
88 static int thread;
89 static struct srpt_thread srpt_thread;
90 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
91 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
92 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
93 module_param(trace_flag, long, 0644);
94 MODULE_PARM_DESC(trace_flag,
95                  "Trace flags for the ib_srpt kernel module.");
96 #endif
97
98 module_param(thread, int, 0444);
99 MODULE_PARM_DESC(thread,
100                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
101                  "where possible");
102
103 static void srpt_add_one(struct ib_device *device);
104 static void srpt_remove_one(struct ib_device *device);
105 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
106 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt);
107
108 static struct ib_client srpt_client = {
109         .name = DRV_NAME,
110         .add = srpt_add_one,
111         .remove = srpt_remove_one
112 };
113
114 /**
115  * Atomically test and set the channel state.
116  * @ch: RDMA channel.
117  * @old: channel state to compare with.
118  * @new: state to change the channel state to if the current state matches the
119  *       argument 'old'.
120  *
121  * Returns true if the channel state matched old upon entry of this function,
122  * and false otherwise.
123  */
124 static bool srpt_test_and_set_channel_state(struct srpt_rdma_ch *ch,
125                                             enum rdma_ch_state old,
126                                             enum rdma_ch_state new)
127 {
128         unsigned long flags;
129         enum rdma_ch_state cur;
130
131         spin_lock_irqsave(&ch->spinlock, flags);
132         cur = ch->state;
133         if (cur == old)
134                 ch->state = new;
135         spin_unlock_irqrestore(&ch->spinlock, flags);
136
137         return cur == old;
138 }
139
140 /*
141  * Callback function called by the InfiniBand core when an asynchronous IB
142  * event occurs. This callback may occur in interrupt context. See also
143  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
144  * Architecture Specification.
145  */
146 static void srpt_event_handler(struct ib_event_handler *handler,
147                                struct ib_event *event)
148 {
149         struct srpt_device *sdev;
150         struct srpt_port *sport;
151
152         sdev = ib_get_client_data(event->device, &srpt_client);
153         if (!sdev || sdev->device != event->device)
154                 return;
155
156         TRACE_DBG("ASYNC event= %d on device= %s",
157                   event->event, sdev->device->name);
158
159         switch (event->event) {
160         case IB_EVENT_PORT_ERR:
161                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
162                         sport = &sdev->port[event->element.port_num - 1];
163                         sport->lid = 0;
164                         sport->sm_lid = 0;
165                 }
166                 break;
167         case IB_EVENT_PORT_ACTIVE:
168         case IB_EVENT_LID_CHANGE:
169         case IB_EVENT_PKEY_CHANGE:
170         case IB_EVENT_SM_CHANGE:
171         case IB_EVENT_CLIENT_REREGISTER:
172                 /*
173                  * Refresh port data asynchronously. Note: it is safe to call
174                  * schedule_work() even if &sport->work is already on the
175                  * global workqueue because schedule_work() tests for the
176                  * work_pending() condition before adding &sport->work to the
177                  * global work queue.
178                  */
179                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
180                         sport = &sdev->port[event->element.port_num - 1];
181                         if (!sport->lid && !sport->sm_lid)
182                                 schedule_work(&sport->work);
183                 }
184                 break;
185         default:
186                 break;
187         }
188
189 }
190
191 /*
192  * Callback function called by the InfiniBand core for SRQ (shared receive
193  * queue) events.
194  */
195 static void srpt_srq_event(struct ib_event *event, void *ctx)
196 {
197         TRACE_DBG("SRQ event %d", event->event);
198 }
199
200 /*
201  * Callback function called by the InfiniBand core for QP (queue pair) events.
202  */
203 static void srpt_qp_event(struct ib_event *event, void *ctx)
204 {
205         struct srpt_rdma_ch *ch = ctx;
206
207         TRACE_DBG("QP event %d on cm_id=%p sess_name=%s state=%d",
208                   event->event, ch->cm_id, ch->sess_name, ch->state);
209
210         switch (event->event) {
211         case IB_EVENT_COMM_EST:
212 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
213                 ib_cm_notify(ch->cm_id, event->event);
214 #else
215                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
216                 printk(KERN_ERR PFX "how to perform ib_cm_notify() on a"
217                         " vanilla 2.6.18 kernel ???\n");
218 #endif
219                 break;
220         case IB_EVENT_QP_LAST_WQE_REACHED:
221                 if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_LIVE,
222                                         RDMA_CHANNEL_DISCONNECTING)) {
223                         TRACE_DBG("%s", "Disconnecting channel.");
224                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
225                 }
226                 break;
227         default:
228                 break;
229         }
230 }
231
232 /*
233  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
234  * the lowest four bits of value in element slot of the array of four bit
235  * elements called c_list (controller list). The index slot is one-based.
236  *
237  * @pre 1 <= slot && 0 <= value && value < 16
238  */
239 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
240 {
241         u16 id;
242         u8 tmp;
243
244         id = (slot - 1) / 2;
245         if (slot & 0x1) {
246                 tmp = c_list[id] & 0xf;
247                 c_list[id] = (value << 4) | tmp;
248         } else {
249                 tmp = c_list[id] & 0xf0;
250                 c_list[id] = (value & 0xf) | tmp;
251         }
252 }
253
254 /*
255  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
256  * ClassPortInfo in the InfiniBand Architecture Specification.
257  */
258 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
259 {
260         struct ib_class_port_info *cif;
261
262         cif = (struct ib_class_port_info *)mad->data;
263         memset(cif, 0, sizeof *cif);
264         cif->base_version = 1;
265         cif->class_version = 1;
266         cif->resp_time_value = 20;
267
268         mad->mad_hdr.status = 0;
269 }
270
271 /*
272  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
273  * InfiniBand Architecture Specification. See also section B.7,
274  * table B.6 in the T10 SRP r16a document.
275  */
276 static void srpt_get_iou(struct ib_dm_mad *mad)
277 {
278         struct ib_dm_iou_info *ioui;
279         u8 slot;
280         int i;
281
282         ioui = (struct ib_dm_iou_info *)mad->data;
283         ioui->change_id = 1;
284         ioui->max_controllers = 16;
285
286         /* set present for slot 1 and empty for the rest */
287         srpt_set_ioc(ioui->controller_list, 1, 1);
288         for (i = 1, slot = 2; i < 16; i++, slot++)
289                 srpt_set_ioc(ioui->controller_list, slot, 0);
290
291         mad->mad_hdr.status = 0;
292 }
293
294 /*
295  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
296  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
297  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
298  * document.
299  */
300 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
301                          struct ib_dm_mad *mad)
302 {
303         struct ib_dm_ioc_profile *iocp;
304
305         iocp = (struct ib_dm_ioc_profile *)mad->data;
306
307         if (!slot || slot > 16) {
308                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
309                 return;
310         }
311
312         if (slot > 2) {
313                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
314                 return;
315         }
316
317         memset(iocp, 0, sizeof *iocp);
318         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
319         iocp->guid = cpu_to_be64(srpt_service_guid);
320         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
321         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
322         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
323         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
324         iocp->subsys_device_id = 0x0;
325         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
326         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
327         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
328         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
329         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
330         iocp->rdma_read_depth = 4;
331         iocp->send_size = cpu_to_be32(MAX_MESSAGE_SIZE);
332         iocp->rdma_size = cpu_to_be32(MAX_RDMA_SIZE);
333         iocp->num_svc_entries = 1;
334         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
335                 SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
336
337         mad->mad_hdr.status = 0;
338 }
339
340 /*
341  * Device management: write ServiceEntries to mad for the given slot. See also
342  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
343  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
344  */
345 static void srpt_get_svc_entries(u64 ioc_guid,
346                                  u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
347 {
348         struct ib_dm_svc_entries *svc_entries;
349
350         WARN_ON(!ioc_guid);
351
352         if (!slot || slot > 16) {
353                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
354                 return;
355         }
356
357         if (slot > 2 || lo > hi || hi > 1) {
358                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
359                 return;
360         }
361
362         svc_entries = (struct ib_dm_svc_entries *)mad->data;
363         memset(svc_entries, 0, sizeof *svc_entries);
364         svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid);
365         snprintf(svc_entries->service_entries[0].name,
366                  sizeof(svc_entries->service_entries[0].name),
367                  "%s%016llx",
368                  SRP_SERVICE_NAME_PREFIX,
369                  (unsigned long long)ioc_guid);
370
371         mad->mad_hdr.status = 0;
372 }
373
374 /*
375  * Actual processing of a received MAD *rq_mad received through source port *sp
376  * (MAD = InfiniBand management datagram). The response to be sent back is
377  * written to *rsp_mad.
378  */
379 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
380                                  struct ib_dm_mad *rsp_mad)
381 {
382         u16 attr_id;
383         u32 slot;
384         u8 hi, lo;
385
386         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
387         switch (attr_id) {
388         case DM_ATTR_CLASS_PORT_INFO:
389                 srpt_get_class_port_info(rsp_mad);
390                 break;
391         case DM_ATTR_IOU_INFO:
392                 srpt_get_iou(rsp_mad);
393                 break;
394         case DM_ATTR_IOC_PROFILE:
395                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
396                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
397                 break;
398         case DM_ATTR_SVC_ENTRIES:
399                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
400                 hi = (u8) ((slot >> 8) & 0xff);
401                 lo = (u8) (slot & 0xff);
402                 slot = (u16) ((slot >> 16) & 0xffff);
403                 srpt_get_svc_entries(srpt_service_guid,
404                                      slot, hi, lo, rsp_mad);
405                 break;
406         default:
407                 rsp_mad->mad_hdr.status =
408                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
409                 break;
410         }
411 }
412
413 /*
414  * Callback function that is called by the InfiniBand core after transmission of
415  * a MAD. (MAD = management datagram; AH = address handle.)
416  */
417 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
418                                   struct ib_mad_send_wc *mad_wc)
419 {
420         ib_destroy_ah(mad_wc->send_buf->ah);
421         ib_free_send_mad(mad_wc->send_buf);
422 }
423
424 /*
425  * Callback function that is called by the InfiniBand core after reception of
426  * a MAD (management datagram).
427  */
428 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
429                                   struct ib_mad_recv_wc *mad_wc)
430 {
431         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
432         struct ib_ah *ah;
433         struct ib_mad_send_buf *rsp;
434         struct ib_dm_mad *dm_mad;
435
436         if (!mad_wc || !mad_wc->recv_buf.mad)
437                 return;
438
439         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
440                                   mad_wc->recv_buf.grh, mad_agent->port_num);
441         if (IS_ERR(ah))
442                 goto err;
443
444         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
445
446         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
447                                  mad_wc->wc->pkey_index, 0,
448                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
449                                  GFP_KERNEL);
450         if (IS_ERR(rsp))
451                 goto err_rsp;
452
453         rsp->ah = ah;
454
455         dm_mad = rsp->mad;
456         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
457         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
458         dm_mad->mad_hdr.status = 0;
459
460         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
461         case IB_MGMT_METHOD_GET:
462                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
463                 break;
464         case IB_MGMT_METHOD_SET:
465                 dm_mad->mad_hdr.status =
466                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
467                 break;
468         default:
469                 dm_mad->mad_hdr.status =
470                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
471                 break;
472         }
473
474         if (!ib_post_send_mad(rsp, NULL)) {
475                 ib_free_recv_mad(mad_wc);
476                 /* will destroy_ah & free_send_mad in send completion */
477                 return;
478         }
479
480         ib_free_send_mad(rsp);
481
482 err_rsp:
483         ib_destroy_ah(ah);
484 err:
485         ib_free_recv_mad(mad_wc);
486 }
487
488 /*
489  * Enable InfiniBand management datagram processing, update the cached sm_lid,
490  * lid and gid values, and register a callback function for processing MADs
491  * on the specified port. It is safe to call this function more than once for
492  * the same port.
493  */
494 static int srpt_refresh_port(struct srpt_port *sport)
495 {
496         struct ib_mad_reg_req reg_req;
497         struct ib_port_modify port_modify;
498         struct ib_port_attr port_attr;
499         int ret;
500
501         memset(&port_modify, 0, sizeof port_modify);
502         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
503         port_modify.clr_port_cap_mask = 0;
504
505         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
506         if (ret)
507                 goto err_mod_port;
508
509         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
510         if (ret)
511                 goto err_query_port;
512
513         sport->sm_lid = port_attr.sm_lid;
514         sport->lid = port_attr.lid;
515
516         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
517         if (ret)
518                 goto err_query_port;
519
520         if (!sport->mad_agent) {
521                 memset(&reg_req, 0, sizeof reg_req);
522                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
523                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
524                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
525                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
526
527                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
528                                                          sport->port,
529                                                          IB_QPT_GSI,
530                                                          &reg_req, 0,
531                                                          srpt_mad_send_handler,
532                                                          srpt_mad_recv_handler,
533                                                          sport);
534                 if (IS_ERR(sport->mad_agent)) {
535                         ret = PTR_ERR(sport->mad_agent);
536                         sport->mad_agent = NULL;
537                         goto err_query_port;
538                 }
539         }
540
541         return 0;
542
543 err_query_port:
544
545         port_modify.set_port_cap_mask = 0;
546         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
547         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
548
549 err_mod_port:
550
551         return ret;
552 }
553
554 /*
555  * Unregister the callback function for processing MADs and disable MAD
556  * processing for all ports of the specified device. It is safe to call this
557  * function more than once for the same device.
558  */
559 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
560 {
561         struct ib_port_modify port_modify = {
562                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
563         };
564         struct srpt_port *sport;
565         int i;
566
567         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
568                 sport = &sdev->port[i - 1];
569                 WARN_ON(sport->port != i);
570                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
571                         printk(KERN_ERR PFX "disabling MAD processing"
572                                " failed.\n");
573                 if (sport->mad_agent) {
574                         ib_unregister_mad_agent(sport->mad_agent);
575                         sport->mad_agent = NULL;
576                 }
577         }
578 }
579
580 /*
581  * Allocate and initialize an SRPT I/O context structure.
582  */
583 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
584 {
585         struct srpt_ioctx *ioctx;
586
587         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
588         if (!ioctx)
589                 goto out;
590
591         ioctx->buf = kzalloc(MAX_MESSAGE_SIZE, GFP_KERNEL);
592         if (!ioctx->buf)
593                 goto out_free_ioctx;
594
595         ioctx->dma = dma_map_single(sdev->device->dma_device, ioctx->buf,
596                                     MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
597 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
598         if (dma_mapping_error(sdev->device->dma_device, ioctx->dma))
599 #else
600         if (dma_mapping_error(ioctx->dma))
601 #endif
602                 goto out_free_buf;
603
604         return ioctx;
605
606 out_free_buf:
607         kfree(ioctx->buf);
608 out_free_ioctx:
609         kfree(ioctx);
610 out:
611         return NULL;
612 }
613
614 /*
615  * Deallocate an SRPT I/O context structure.
616  */
617 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
618 {
619         if (!ioctx)
620                 return;
621
622         dma_unmap_single(sdev->device->dma_device, ioctx->dma,
623                          MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
624         kfree(ioctx->buf);
625         kfree(ioctx);
626 }
627
628 /*
629  * Associate a ring of SRPT I/O context structures with the specified device.
630  */
631 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
632 {
633         int i;
634
635         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
636                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
637
638                 if (!sdev->ioctx_ring[i])
639                         goto err;
640
641                 sdev->ioctx_ring[i]->index = i;
642         }
643
644         return 0;
645
646 err:
647         while (--i > 0) {
648                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
649                 sdev->ioctx_ring[i] = NULL;
650         }
651         return -ENOMEM;
652 }
653
654 /* Free the ring of SRPT I/O context structures. */
655 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
656 {
657         int i;
658
659         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
660                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
661                 sdev->ioctx_ring[i] = NULL;
662         }
663 }
664
665 /*
666  * Post a receive request on the work queue of InfiniBand device 'sdev'.
667  */
668 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
669 {
670         struct ib_sge list;
671         struct ib_recv_wr wr, *bad_wr;
672
673         wr.wr_id = ioctx->index | SRPT_OP_RECV;
674
675         list.addr = ioctx->dma;
676         list.length = MAX_MESSAGE_SIZE;
677         list.lkey = sdev->mr->lkey;
678
679         wr.next = NULL;
680         wr.sg_list = &list;
681         wr.num_sge = 1;
682
683         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
684 }
685
686 /*
687  * Post an IB send request.
688  * @ch: RDMA channel to post the send request on.
689  * @ioctx: I/O context of the send request.
690  * @len: length of the request to be sent in bytes.
691  *
692  * Returns zero upon success and a non-zero value upon failure.
693  */
694 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
695                           int len)
696 {
697         struct ib_sge list;
698         struct ib_send_wr wr, *bad_wr;
699         struct srpt_device *sdev = ch->sport->sdev;
700
701         dma_sync_single_for_device(sdev->device->dma_device, ioctx->dma,
702                                    MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
703
704         list.addr = ioctx->dma;
705         list.length = len;
706         list.lkey = sdev->mr->lkey;
707
708         wr.next = NULL;
709         wr.wr_id = ioctx->index;
710         wr.sg_list = &list;
711         wr.num_sge = 1;
712         wr.opcode = IB_WR_SEND;
713         wr.send_flags = IB_SEND_SIGNALED;
714
715         return ib_post_send(ch->qp, &wr, &bad_wr);
716 }
717
718 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
719                              int *ind)
720 {
721         struct srp_indirect_buf *idb;
722         struct srp_direct_buf *db;
723
724         *ind = 0;
725         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
726             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
727                 ioctx->n_rbuf = 1;
728                 ioctx->rbufs = &ioctx->single_rbuf;
729
730                 db = (void *)srp_cmd->add_data;
731                 memcpy(ioctx->rbufs, db, sizeof *db);
732                 ioctx->data_len = be32_to_cpu(db->len);
733         } else {
734                 idb = (void *)srp_cmd->add_data;
735
736                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
737
738                 if (ioctx->n_rbuf >
739                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
740                         *ind = 1;
741                         ioctx->n_rbuf = 0;
742                         goto out;
743                 }
744
745                 if (ioctx->n_rbuf == 1)
746                         ioctx->rbufs = &ioctx->single_rbuf;
747                 else
748                         ioctx->rbufs =
749                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
750                 if (!ioctx->rbufs) {
751                         ioctx->n_rbuf = 0;
752                         return -ENOMEM;
753                 }
754
755                 db = idb->desc_list;
756                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
757                 ioctx->data_len = be32_to_cpu(idb->len);
758         }
759 out:
760         return 0;
761 }
762
763 /*
764  * Modify the attributes of queue pair 'qp': allow local write, remote read,
765  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
766  */
767 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
768 {
769         struct ib_qp_attr *attr;
770         int ret;
771
772         attr = kzalloc(sizeof *attr, GFP_KERNEL);
773         if (!attr)
774                 return -ENOMEM;
775
776         attr->qp_state = IB_QPS_INIT;
777         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
778             IB_ACCESS_REMOTE_WRITE;
779         attr->port_num = ch->sport->port;
780         attr->pkey_index = 0;
781
782         ret = ib_modify_qp(qp, attr,
783                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
784                            IB_QP_PKEY_INDEX);
785
786         kfree(attr);
787         return ret;
788 }
789
790 /**
791  * Change the state of a channel to 'ready to receive' (RTR).
792  * @ch: channel of the queue pair.
793  * @qp: queue pair to change the state of.
794  *
795  * Returns zero upon success and a negative value upon failure.
796  *
797  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
798  * If this structure ever becomes larger, it might be necessary to allocate
799  * it dynamically instead of on the stack.
800  */
801 static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
802 {
803         struct ib_qp_attr qp_attr;
804         int attr_mask;
805         int ret;
806
807         qp_attr.qp_state = IB_QPS_RTR;
808         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
809         if (ret)
810                 goto out;
811
812         qp_attr.max_dest_rd_atomic = 4;
813
814         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
815
816 out:
817         return ret;
818 }
819
820 /**
821  * Change the state of a channel to 'ready to send' (RTS).
822  * @ch: channel of the queue pair.
823  * @qp: queue pair to change the state of.
824  *
825  * Returns zero upon success and a negative value upon failure.
826  *
827  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
828  * If this structure ever becomes larger, it might be necessary to allocate
829  * it dynamically instead of on the stack.
830  */
831 static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
832 {
833         struct ib_qp_attr qp_attr;
834         int attr_mask;
835         int ret;
836
837         qp_attr.qp_state = IB_QPS_RTS;
838         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
839         if (ret)
840                 goto out;
841
842         qp_attr.max_rd_atomic = 4;
843
844         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
845
846 out:
847         return ret;
848 }
849
850 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
851 {
852         int i;
853
854         if (ioctx->n_rdma_ius > 0 && ioctx->rdma_ius) {
855                 struct rdma_iu *riu = ioctx->rdma_ius;
856
857                 for (i = 0; i < ioctx->n_rdma_ius; ++i, ++riu)
858                         kfree(riu->sge);
859                 kfree(ioctx->rdma_ius);
860         }
861
862         if (ioctx->n_rbuf > 1)
863                 kfree(ioctx->rbufs);
864
865         if (srpt_post_recv(ch->sport->sdev, ioctx))
866                 printk(KERN_ERR PFX "SRQ post_recv failed - this is serious\n");
867                 /* we should queue it back to free_ioctx queue */
868         else
869                 atomic_inc(&ch->req_lim_delta);
870 }
871
872 static void srpt_abort_scst_cmd(struct srpt_device *sdev,
873                                 struct scst_cmd *scmnd,
874                                 bool tell_initiator)
875 {
876         struct srpt_ioctx *ioctx;
877         scst_data_direction dir;
878
879         ioctx = scst_cmd_get_tgt_priv(scmnd);
880         BUG_ON(!ioctx);
881         dir = scst_cmd_get_data_direction(scmnd);
882         if (dir != SCST_DATA_NONE) {
883                 dma_unmap_sg(sdev->device->dma_device,
884                              scst_cmd_get_sg(scmnd),
885                              scst_cmd_get_sg_cnt(scmnd),
886                              scst_to_tgt_dma_dir(dir));
887
888 #if 1
889                 switch (scmnd->state) {
890                 case SCST_CMD_STATE_DATA_WAIT:
891                         WARN_ON(ioctx->state != SRPT_STATE_NEED_DATA);
892                         break;
893                 case SCST_CMD_STATE_XMIT_WAIT:
894                         WARN_ON(ioctx->state != SRPT_STATE_PROCESSED);
895                         break;
896                 default:
897                         WARN_ON(ioctx->state == SRPT_STATE_NEED_DATA ||
898                                 ioctx->state == SRPT_STATE_PROCESSED);
899                 }
900 #endif
901
902                 if (ioctx->state == SRPT_STATE_NEED_DATA) {
903                         scst_rx_data(scmnd,
904                                      tell_initiator ? SCST_RX_STATUS_ERROR
905                                      : SCST_RX_STATUS_ERROR_FATAL,
906                                      SCST_CONTEXT_THREAD);
907                         goto out;
908                 } else if (ioctx->state == SRPT_STATE_PROCESSED)
909                         ;
910                 else {
911                         printk(KERN_ERR PFX
912                                "unexpected cmd state %d (SCST) %d (SRPT)\n",
913                                scmnd->state, ioctx->state);
914                         WARN_ON("unexpected cmd state");
915                 }
916         }
917
918         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_FAILED);
919         scst_tgt_cmd_done(scmnd, scst_estimate_context());
920 out:
921         return;
922 }
923
924 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
925 {
926         struct srpt_ioctx *ioctx;
927         struct srpt_device *sdev = ch->sport->sdev;
928
929         if (wc->wr_id & SRPT_OP_RECV) {
930                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
931                 printk(KERN_ERR PFX "This is serious - SRQ is in bad state\n");
932         } else {
933                 ioctx = sdev->ioctx_ring[wc->wr_id];
934
935                 if (ioctx->scmnd)
936                         srpt_abort_scst_cmd(sdev, ioctx->scmnd, true);
937                 else
938                         srpt_reset_ioctx(ch, ioctx);
939         }
940 }
941
942 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
943                                   struct srpt_ioctx *ioctx,
944                                   enum scst_exec_context context)
945 {
946         if (ioctx->scmnd) {
947                 scst_data_direction dir =
948                         scst_cmd_get_data_direction(ioctx->scmnd);
949
950                 if (dir != SCST_DATA_NONE)
951                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
952                                      scst_cmd_get_sg(ioctx->scmnd),
953                                      scst_cmd_get_sg_cnt(ioctx->scmnd),
954                                      scst_to_tgt_dma_dir(dir));
955
956                 scst_tgt_cmd_done(ioctx->scmnd, context);
957         } else
958                 srpt_reset_ioctx(ch, ioctx);
959 }
960
961 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
962                                   struct srpt_ioctx *ioctx)
963 {
964         if (!ioctx->scmnd) {
965                 srpt_reset_ioctx(ch, ioctx);
966                 return;
967         }
968
969         if (scst_cmd_get_data_direction(ioctx->scmnd) == SCST_DATA_WRITE)
970                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
971                         scst_estimate_context());
972 }
973
974 /**
975  * Build an SRP_RSP response.
976  * @ch: RDMA channel through which the request has been received.
977  * @ioctx: I/O context in which the SRP_RSP response will be built.
978  * @s_key: sense key that will be stored in the response.
979  * @s_code: value that will be stored in the asc_ascq field of the sense data.
980  * @tag: tag of the request for which this response is being generated.
981  *
982  * Returns the size in bytes of the SRP_RSP response.
983  *
984  * An SRP_RSP response contains a SCSI status or service response. See also
985  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
986  * response. See also SPC-2 for more information about sense data.
987  */
988 static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
989                               struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
990                               u64 tag)
991 {
992         struct srp_rsp *srp_rsp;
993         struct sense_data *sense;
994         int limit_delta;
995         int sense_data_len = 0;
996
997         srp_rsp = ioctx->buf;
998         memset(srp_rsp, 0, sizeof *srp_rsp);
999
1000         limit_delta = atomic_read(&ch->req_lim_delta);
1001         atomic_sub(limit_delta, &ch->req_lim_delta);
1002
1003         srp_rsp->opcode = SRP_RSP;
1004         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1005         srp_rsp->tag = tag;
1006
1007         if (s_key != NO_SENSE) {
1008                 sense_data_len = sizeof *sense + (sizeof *sense % 4);
1009                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
1010                 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
1011                 srp_rsp->sense_data_len = cpu_to_be32(sense_data_len);
1012
1013                 sense = (struct sense_data *)(srp_rsp + 1);
1014                 sense->err_code = 0x70;
1015                 sense->key = s_key;
1016                 sense->asc_ascq = s_code;
1017         }
1018
1019         return sizeof(*srp_rsp) + sense_data_len;
1020 }
1021
1022 /**
1023  * Build a task management response, which is a specific SRP_RSP response.
1024  * @ch: RDMA channel through which the request has been received.
1025  * @ioctx: I/O context in which the SRP_RSP response will be built.
1026  * @rsp_code: RSP_CODE that will be stored in the response.
1027  * @tag: tag of the request for which this response is being generated.
1028  *
1029  * Returns the size in bytes of the SRP_RSP response.
1030  *
1031  * An SRP_RSP response contains a SCSI status or service response. See also
1032  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1033  * response.
1034  */
1035 static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
1036                                   struct srpt_ioctx *ioctx, u8 rsp_code,
1037                                   u64 tag)
1038 {
1039         struct srp_rsp *srp_rsp;
1040         int limit_delta;
1041         int resp_data_len = 0;
1042
1043         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
1044                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
1045
1046         srp_rsp = ioctx->buf;
1047         memset(srp_rsp, 0, sizeof *srp_rsp);
1048
1049         limit_delta = atomic_read(&ch->req_lim_delta);
1050         atomic_sub(limit_delta, &ch->req_lim_delta);
1051
1052         srp_rsp->opcode = SRP_RSP;
1053         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1054         srp_rsp->tag = tag;
1055
1056         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
1057                 resp_data_len = 4;
1058                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
1059                 srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
1060                 srp_rsp->data[3] = rsp_code;
1061         }
1062
1063         return sizeof(*srp_rsp) + resp_data_len;
1064 }
1065
1066 /*
1067  * Process SRP_CMD.
1068  */
1069 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
1070 {
1071         struct scst_cmd *scmnd;
1072         struct srp_cmd *srp_cmd;
1073         struct srp_rsp *srp_rsp;
1074         scst_data_direction dir = SCST_DATA_NONE;
1075         int indirect_desc = 0;
1076         int ret;
1077         unsigned long flags;
1078
1079         srp_cmd = ioctx->buf;
1080         srp_rsp = ioctx->buf;
1081
1082         if (srp_cmd->buf_fmt) {
1083                 ret = srpt_get_desc_tbl(ioctx, srp_cmd, &indirect_desc);
1084                 if (ret) {
1085                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1086                                            NO_ADD_SENSE, srp_cmd->tag);
1087                         srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1088                         goto err;
1089                 }
1090
1091                 if (indirect_desc) {
1092                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1093                                            NO_ADD_SENSE, srp_cmd->tag);
1094                         srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1095                         goto err;
1096                 }
1097
1098                 if (srp_cmd->buf_fmt & 0xf)
1099                         dir = SCST_DATA_READ;
1100                 else if (srp_cmd->buf_fmt >> 4)
1101                         dir = SCST_DATA_WRITE;
1102                 else
1103                         dir = SCST_DATA_NONE;
1104         } else
1105                 dir = SCST_DATA_NONE;
1106
1107         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
1108                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
1109                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
1110         if (!scmnd) {
1111                 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1112                                    NO_ADD_SENSE, srp_cmd->tag);
1113                 srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1114                 goto err;
1115         }
1116
1117         ioctx->scmnd = scmnd;
1118
1119         switch (srp_cmd->task_attr) {
1120         case SRP_CMD_HEAD_OF_Q:
1121                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
1122                 break;
1123         case SRP_CMD_ORDERED_Q:
1124                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1125                 break;
1126         case SRP_CMD_SIMPLE_Q:
1127                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
1128                 break;
1129         case SRP_CMD_ACA:
1130                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
1131                 break;
1132         default:
1133                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1134                 break;
1135         }
1136
1137         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1138         scst_cmd_set_tgt_priv(scmnd, ioctx);
1139         scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
1140
1141         spin_lock_irqsave(&ch->spinlock, flags);
1142         list_add_tail(&ioctx->scmnd_list, &ch->active_scmnd_list);
1143         ch->active_scmnd_cnt++;
1144         spin_unlock_irqrestore(&ch->spinlock, flags);
1145
1146         scst_cmd_init_done(scmnd, scst_estimate_context());
1147
1148         return 0;
1149
1150 err:
1151         WARN_ON(srp_rsp->opcode != SRP_RSP);
1152
1153         return -1;
1154 }
1155
1156 /*
1157  * Process an SRP_TSK_MGMT request.
1158  *
1159  * Returns 0 upon success and -1 upon failure.
1160  *
1161  * Each task management function is performed by calling one of the
1162  * scst_rx_mgmt_fn*() functions. These functions will either report failure
1163  * or process the task management function asynchronously. The function
1164  * srpt_tsk_mgmt_done() will be called by the SCST core upon completion of the
1165  * task management function. When srpt_handle_tsk_mgmt() reports failure
1166  * (i.e. returns -1) a response will have been built in ioctx->buf. This
1167  * information unit has to be sent back by the caller.
1168  *
1169  * For more information about SRP_TSK_MGMT information units, see also section
1170  * 6.7 in the T10 SRP r16a document.
1171  */
1172 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1173                                 struct srpt_ioctx *ioctx)
1174 {
1175         struct srp_tsk_mgmt *srp_tsk;
1176         struct srpt_mgmt_ioctx *mgmt_ioctx;
1177         int ret;
1178
1179         srp_tsk = ioctx->buf;
1180
1181         TRACE_DBG("recv_tsk_mgmt= %d for task_tag= %lld"
1182                   " using tag= %lld cm_id= %p sess= %p",
1183                   srp_tsk->tsk_mgmt_func,
1184                   (unsigned long long) srp_tsk->task_tag,
1185                   (unsigned long long) srp_tsk->tag,
1186                   ch->cm_id, ch->scst_sess);
1187
1188         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1189         if (!mgmt_ioctx) {
1190                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1191                                        srp_tsk->tag);
1192                 goto err;
1193         }
1194
1195         mgmt_ioctx->ioctx = ioctx;
1196         mgmt_ioctx->ch = ch;
1197         mgmt_ioctx->tag = srp_tsk->tag;
1198
1199         switch (srp_tsk->tsk_mgmt_func) {
1200         case SRP_TSK_ABORT_TASK:
1201                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK");
1202                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1203                                           SCST_ABORT_TASK,
1204                                           srp_tsk->task_tag,
1205                                           thread ?
1206                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1207                                           mgmt_ioctx);
1208                 break;
1209         case SRP_TSK_ABORT_TASK_SET:
1210                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK_SET");
1211                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1212                                           SCST_ABORT_TASK_SET,
1213                                           (u8 *) &srp_tsk->lun,
1214                                           sizeof srp_tsk->lun,
1215                                           thread ?
1216                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1217                                           mgmt_ioctx);
1218                 break;
1219         case SRP_TSK_CLEAR_TASK_SET:
1220                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_TASK_SET");
1221                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1222                                           SCST_CLEAR_TASK_SET,
1223                                           (u8 *) &srp_tsk->lun,
1224                                           sizeof srp_tsk->lun,
1225                                           thread ?
1226                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1227                                           mgmt_ioctx);
1228                 break;
1229         case SRP_TSK_LUN_RESET:
1230                 TRACE_DBG("%s", "Processing SRP_TSK_LUN_RESET");
1231                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1232                                           SCST_LUN_RESET,
1233                                           (u8 *) &srp_tsk->lun,
1234                                           sizeof srp_tsk->lun,
1235                                           thread ?
1236                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1237                                           mgmt_ioctx);
1238                 break;
1239         case SRP_TSK_CLEAR_ACA:
1240                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_ACA");
1241                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1242                                           SCST_CLEAR_ACA,
1243                                           (u8 *) &srp_tsk->lun,
1244                                           sizeof srp_tsk->lun,
1245                                           thread ?
1246                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1247                                           mgmt_ioctx);
1248                 break;
1249         default:
1250                 TRACE_DBG("%s", "Unsupported task management function.");
1251                 srpt_build_tskmgmt_rsp(ch, ioctx,
1252                                        SRP_TSK_MGMT_FUNC_NOT_SUPP,
1253                                        srp_tsk->tag);
1254                 goto err;
1255         }
1256
1257         if (ret) {
1258                 TRACE_DBG("%s", "Processing task management function failed.");
1259                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1260                                        srp_tsk->tag);
1261                 goto err;
1262         }
1263
1264         WARN_ON(srp_tsk->opcode == SRP_RSP);
1265
1266         return 0;
1267
1268 err:
1269         WARN_ON(srp_tsk->opcode != SRP_RSP);
1270
1271         kfree(mgmt_ioctx);
1272         return -1;
1273 }
1274
1275 /**
1276  * Process a receive completion event.
1277  * @ch: RDMA channel for which the completion event has been received.
1278  * @ioctx: SRPT I/O context for which the completion event has been received.
1279  */
1280 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1281                                struct srpt_ioctx *ioctx)
1282 {
1283         struct srp_cmd *srp_cmd;
1284         struct srp_rsp *srp_rsp;
1285         unsigned long flags;
1286         int len;
1287
1288         spin_lock_irqsave(&ch->spinlock, flags);
1289         if (ch->state != RDMA_CHANNEL_LIVE) {
1290                 if (ch->state == RDMA_CHANNEL_CONNECTING) {
1291                         list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1292                         spin_unlock_irqrestore(&ch->spinlock, flags);
1293                         return;
1294                 } else {
1295                         spin_unlock_irqrestore(&ch->spinlock, flags);
1296                         srpt_reset_ioctx(ch, ioctx);
1297                         return;
1298                 }
1299         }
1300         spin_unlock_irqrestore(&ch->spinlock, flags);
1301
1302         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
1303                                 MAX_MESSAGE_SIZE, DMA_FROM_DEVICE);
1304
1305         ioctx->data_len = 0;
1306         ioctx->n_rbuf = 0;
1307         ioctx->rbufs = NULL;
1308         ioctx->n_rdma = 0;
1309         ioctx->n_rdma_ius = 0;
1310         ioctx->rdma_ius = NULL;
1311         ioctx->scmnd = NULL;
1312         ioctx->state = SRPT_STATE_NEW;
1313
1314         srp_cmd = ioctx->buf;
1315         srp_rsp = ioctx->buf;
1316
1317         switch (srp_cmd->opcode) {
1318         case SRP_CMD:
1319                 if (srpt_handle_cmd(ch, ioctx) < 0)
1320                         goto err;
1321                 break;
1322
1323         case SRP_TSK_MGMT:
1324                 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1325                         goto err;
1326                 break;
1327
1328         case SRP_I_LOGOUT:
1329         case SRP_AER_REQ:
1330         default:
1331                 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1332                                    srp_cmd->tag);
1333                 goto err;
1334         }
1335
1336         dma_sync_single_for_device(ch->sport->sdev->device->dma_device,
1337                                    ioctx->dma, MAX_MESSAGE_SIZE,
1338                                    DMA_FROM_DEVICE);
1339
1340         return;
1341
1342 err:
1343         WARN_ON(srp_rsp->opcode != SRP_RSP);
1344         len = (sizeof *srp_rsp) + be32_to_cpu(srp_rsp->sense_data_len);
1345
1346         if (ch->state != RDMA_CHANNEL_LIVE) {
1347                 /* Give up if another thread modified the channel state. */
1348                 printk(KERN_ERR PFX "%s: channel is in state %d",
1349                        __func__, ch->state);
1350                 srpt_reset_ioctx(ch, ioctx);
1351         } else if (srpt_post_send(ch, ioctx, len)) {
1352                 printk(KERN_ERR PFX "%s: sending SRP_RSP response failed",
1353                        __func__);
1354                 srpt_reset_ioctx(ch, ioctx);
1355         }
1356 }
1357
1358 /*
1359  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1360  * should stop.
1361  * @pre thread != 0
1362  */
1363 static inline int srpt_test_ioctx_list(void)
1364 {
1365         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1366                    unlikely(kthread_should_stop()));
1367         return res;
1368 }
1369
1370 /*
1371  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1372  *
1373  * @pre thread != 0
1374  */
1375 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1376 {
1377         unsigned long flags;
1378
1379         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1380         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1381         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1382         wake_up(&ioctx_list_waitQ);
1383 }
1384
1385 /**
1386  * InfiniBand completion queue callback function.
1387  * @cq: completion queue.
1388  * @ctx: completion queue context, which was passed as the fourth argument of
1389  *       the function ib_create_cq().
1390  */
1391 static void srpt_completion(struct ib_cq *cq, void *ctx)
1392 {
1393         struct srpt_rdma_ch *ch = ctx;
1394         struct srpt_device *sdev = ch->sport->sdev;
1395         struct ib_wc wc;
1396         struct srpt_ioctx *ioctx;
1397
1398         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1399         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1400                 if (wc.status) {
1401                         printk(KERN_ERR PFX "failed %s status= %d\n",
1402                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1403                                wc.status);
1404                         srpt_handle_err_comp(ch, &wc);
1405                         break;
1406                 }
1407
1408                 if (wc.wr_id & SRPT_OP_RECV) {
1409                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1410                         if (thread) {
1411                                 ioctx->ch = ch;
1412                                 ioctx->op = IB_WC_RECV;
1413                                 srpt_schedule_thread(ioctx);
1414                         } else
1415                                 srpt_handle_new_iu(ch, ioctx);
1416                         continue;
1417                 } else
1418                         ioctx = sdev->ioctx_ring[wc.wr_id];
1419
1420                 if (thread) {
1421                         ioctx->ch = ch;
1422                         ioctx->op = wc.opcode;
1423                         srpt_schedule_thread(ioctx);
1424                 } else {
1425                         switch (wc.opcode) {
1426                         case IB_WC_SEND:
1427                                 srpt_handle_send_comp(ch, ioctx,
1428                                         scst_estimate_context());
1429                                 break;
1430                         case IB_WC_RDMA_WRITE:
1431                         case IB_WC_RDMA_READ:
1432                                 srpt_handle_rdma_comp(ch, ioctx);
1433                                 break;
1434                         default:
1435                                 break;
1436                         }
1437                 }
1438         }
1439 }
1440
1441 /*
1442  * Create a completion queue on the specified device.
1443  */
1444 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1445 {
1446         struct ib_qp_init_attr *qp_init;
1447         struct srpt_device *sdev = ch->sport->sdev;
1448         int cqe;
1449         int ret;
1450
1451         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1452         if (!qp_init)
1453                 return -ENOMEM;
1454
1455         /* Create a completion queue (CQ). */
1456
1457         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1458 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1459         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1460 #else
1461         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1462 #endif
1463         if (IS_ERR(ch->cq)) {
1464                 ret = PTR_ERR(ch->cq);
1465                 printk(KERN_ERR PFX "failed to create_cq cqe= %d ret= %d\n",
1466                         cqe, ret);
1467                 goto out;
1468         }
1469
1470         /* Request completion notification. */
1471
1472         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1473
1474         /* Create a queue pair (QP). */
1475
1476         qp_init->qp_context = (void *)ch;
1477         qp_init->event_handler = srpt_qp_event;
1478         qp_init->send_cq = ch->cq;
1479         qp_init->recv_cq = ch->cq;
1480         qp_init->srq = sdev->srq;
1481         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1482         qp_init->qp_type = IB_QPT_RC;
1483         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1484         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1485
1486         ch->qp = ib_create_qp(sdev->pd, qp_init);
1487         if (IS_ERR(ch->qp)) {
1488                 ret = PTR_ERR(ch->qp);
1489                 ib_destroy_cq(ch->cq);
1490                 printk(KERN_ERR PFX "failed to create_qp ret= %d\n", ret);
1491                 goto out;
1492         }
1493
1494         TRACE_DBG("%s: max_cqe= %d max_sge= %d cm_id= %p",
1495                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1496                ch->cm_id);
1497
1498         /* Modify the attributes and the state of queue pair ch->qp. */
1499
1500         ret = srpt_init_ch_qp(ch, ch->qp);
1501         if (ret) {
1502                 ib_destroy_qp(ch->qp);
1503                 ib_destroy_cq(ch->cq);
1504                 goto out;
1505         }
1506
1507         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1508 out:
1509         kfree(qp_init);
1510         return ret;
1511 }
1512
1513 /**
1514  * Look up the RDMA channel that corresponds to the specified cm_id.
1515  *
1516  * Return NULL if no matching RDMA channel has been found.
1517  */
1518 static struct srpt_rdma_ch *srpt_find_channel(struct ib_cm_id *cm_id, bool del)
1519 {
1520         struct srpt_device *sdev = cm_id->context;
1521         struct srpt_rdma_ch *ch;
1522
1523         spin_lock_irq(&sdev->spinlock);
1524         list_for_each_entry(ch, &sdev->rch_list, list) {
1525                 if (ch->cm_id == cm_id) {
1526                         if (del)
1527                                 list_del(&ch->list);
1528                         spin_unlock_irq(&sdev->spinlock);
1529                         return ch;
1530                 }
1531         }
1532
1533         spin_unlock_irq(&sdev->spinlock);
1534
1535         return NULL;
1536 }
1537
1538 /**
1539  * Release all resources associated with the specified RDMA channel.
1540  *
1541  * Note: the caller must have removed the channel from the channel list
1542  * before calling this function.
1543  */
1544 static void srpt_release_channel(struct srpt_rdma_ch *ch, int destroy_cmid)
1545 {
1546         TRACE_ENTRY();
1547
1548         WARN_ON(srpt_find_channel(ch->cm_id, false) == ch);
1549
1550         if (ch->cm_id && destroy_cmid) {
1551                 TRACE_DBG("%s: destroy cm_id= %p", __func__, ch->cm_id);
1552                 ib_destroy_cm_id(ch->cm_id);
1553                 ch->cm_id = NULL;
1554         }
1555
1556         ib_destroy_qp(ch->qp);
1557         ib_destroy_cq(ch->cq);
1558
1559         if (ch->scst_sess) {
1560                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1561
1562                 TRACE_DBG("%s: release sess= %p sess_name= %s active_cmd= %d",
1563                           __func__, ch->scst_sess, ch->sess_name,
1564                           ch->active_scmnd_cnt);
1565
1566                 spin_lock_irq(&ch->spinlock);
1567                 list_for_each_entry_safe(ioctx, ioctx_tmp,
1568                                          &ch->active_scmnd_list, scmnd_list) {
1569                         spin_unlock_irq(&ch->spinlock);
1570
1571                         if (ioctx->scmnd)
1572                                 srpt_abort_scst_cmd(ch->sport->sdev,
1573                                                     ioctx->scmnd, true);
1574
1575                         spin_lock_irq(&ch->spinlock);
1576                 }
1577                 WARN_ON(!list_empty(&ch->active_scmnd_list));
1578                 WARN_ON(ch->active_scmnd_cnt != 0);
1579                 spin_unlock_irq(&ch->spinlock);
1580
1581                 scst_unregister_session(ch->scst_sess, 0, NULL);
1582                 ch->scst_sess = NULL;
1583         }
1584
1585         kfree(ch);
1586
1587         TRACE_EXIT();
1588 }
1589
1590 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1591                             struct ib_cm_req_event_param *param,
1592                             void *private_data)
1593 {
1594         struct srpt_device *sdev = cm_id->context;
1595         struct srp_login_req *req;
1596         struct srp_login_rsp *rsp;
1597         struct srp_login_rej *rej;
1598         struct ib_cm_rep_param *rep_param;
1599         struct srpt_rdma_ch *ch, *tmp_ch;
1600         u32 it_iu_len;
1601         int ret = 0;
1602
1603         if (!sdev || !private_data)
1604                 return -EINVAL;
1605
1606         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1607         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1608         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1609
1610         if (!rsp || !rej || !rep_param) {
1611                 ret = -ENOMEM;
1612                 goto out;
1613         }
1614
1615         req = (struct srp_login_req *)private_data;
1616
1617         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1618
1619         TRACE_DBG("Host login i_port_id=0x%llx:0x%llx t_port_id=0x%llx:0x%llx"
1620             " it_iu_len=%d",
1621             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1622             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1623             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1624             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1625             it_iu_len);
1626
1627         if (it_iu_len > MAX_MESSAGE_SIZE || it_iu_len < 64) {
1628                 rej->reason =
1629                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1630                 ret = -EINVAL;
1631                 TRACE_DBG("Reject invalid it_iu_len=%d", it_iu_len);
1632                 goto reject;
1633         }
1634
1635         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1636                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1637
1638                 spin_lock_irq(&sdev->spinlock);
1639
1640                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1641                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1642                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1643                             && param->port == ch->sport->port
1644                             && param->listen_id == ch->sport->sdev->cm_id
1645                             && ch->cm_id) {
1646                                 enum rdma_ch_state prev_state;
1647
1648                                 /* found an existing channel */
1649                                 TRACE_DBG("Found existing channel name= %s"
1650                                           " cm_id= %p state= %d",
1651                                           ch->sess_name, ch->cm_id, ch->state);
1652
1653                                 prev_state = ch->state;
1654                                 if (ch->state == RDMA_CHANNEL_LIVE)
1655                                         ch->state = RDMA_CHANNEL_DISCONNECTING;
1656                                 else if (ch->state == RDMA_CHANNEL_CONNECTING)
1657                                         list_del(&ch->list);
1658
1659                                 spin_unlock_irq(&sdev->spinlock);
1660
1661                                 rsp->rsp_flags =
1662                                         SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1663
1664                                 if (prev_state == RDMA_CHANNEL_LIVE)
1665                                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
1666                                 else if (prev_state ==
1667                                          RDMA_CHANNEL_CONNECTING) {
1668                                         ib_send_cm_rej(ch->cm_id,
1669                                                        IB_CM_REJ_NO_RESOURCES,
1670                                                        NULL, 0, NULL, 0);
1671                                         srpt_release_channel(ch, 1);
1672                                 }
1673
1674                                 spin_lock_irq(&sdev->spinlock);
1675                         }
1676                 }
1677
1678                 spin_unlock_irq(&sdev->spinlock);
1679
1680         } else
1681                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1682
1683         if (((u64) (*(u64 *) req->target_port_id) !=
1684              cpu_to_be64(srpt_service_guid)) ||
1685             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1686              cpu_to_be64(srpt_service_guid))) {
1687                 rej->reason =
1688                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1689                 ret = -ENOMEM;
1690                 TRACE_DBG("%s", "Reject invalid target_port_id");
1691                 goto reject;
1692         }
1693
1694         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1695         if (!ch) {
1696                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1697                 TRACE_DBG("%s", "Reject failed allocate rdma_ch");
1698                 ret = -ENOMEM;
1699                 goto reject;
1700         }
1701
1702         spin_lock_init(&ch->spinlock);
1703         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1704         memcpy(ch->t_port_id, req->target_port_id, 16);
1705         ch->sport = &sdev->port[param->port - 1];
1706         ch->cm_id = cm_id;
1707         ch->state = RDMA_CHANNEL_CONNECTING;
1708         INIT_LIST_HEAD(&ch->cmd_wait_list);
1709         INIT_LIST_HEAD(&ch->active_scmnd_list);
1710
1711         ret = srpt_create_ch_ib(ch);
1712         if (ret) {
1713                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1714                 TRACE_DBG("%s", "Reject failed to create rdma_ch");
1715                 goto free_ch;
1716         }
1717
1718         ret = srpt_ch_qp_rtr(ch, ch->qp);
1719         if (ret) {
1720                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1721                 TRACE_DBG("Reject failed qp to rtr/rts ret=%d", ret);
1722                 goto destroy_ib;
1723         }
1724
1725         snprintf(ch->sess_name, sizeof(ch->sess_name),
1726                  "0x%016llx%016llx",
1727                  (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1728                  (unsigned long long)be64_to_cpu(*(u64 *)(ch->i_port_id + 8)));
1729
1730         TRACE_DBG("registering session %s", ch->sess_name);
1731
1732         BUG_ON(!sdev->scst_tgt);
1733         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1734                                   NULL, NULL);
1735         if (!ch->scst_sess) {
1736                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1737                 TRACE_DBG("%s", "Failed to create scst sess");
1738                 goto destroy_ib;
1739         }
1740
1741         TRACE_DBG("Establish connection sess=%p name=%s cm_id=%p",
1742                   ch->scst_sess, ch->sess_name, ch->cm_id);
1743
1744         scst_sess_set_tgt_priv(ch->scst_sess, ch);
1745
1746         /* create srp_login_response */
1747         rsp->opcode = SRP_LOGIN_RSP;
1748         rsp->tag = req->tag;
1749         rsp->max_it_iu_len = req->req_it_iu_len;
1750         rsp->max_ti_iu_len = req->req_it_iu_len;
1751         rsp->buf_fmt =
1752             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1753         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1754         atomic_set(&ch->req_lim_delta, 0);
1755
1756         /* create cm reply */
1757         rep_param->qp_num = ch->qp->qp_num;
1758         rep_param->private_data = (void *)rsp;
1759         rep_param->private_data_len = sizeof *rsp;
1760         rep_param->rnr_retry_count = 7;
1761         rep_param->flow_control = 1;
1762         rep_param->failover_accepted = 0;
1763         rep_param->srq = 1;
1764         rep_param->responder_resources = 4;
1765         rep_param->initiator_depth = 4;
1766
1767         ret = ib_send_cm_rep(cm_id, rep_param);
1768         if (ret)
1769                 goto release_channel;
1770
1771         spin_lock_irq(&sdev->spinlock);
1772         list_add_tail(&ch->list, &sdev->rch_list);
1773         spin_unlock_irq(&sdev->spinlock);
1774
1775         goto out;
1776
1777 release_channel:
1778         scst_unregister_session(ch->scst_sess, 0, NULL);
1779         ch->scst_sess = NULL;
1780
1781 destroy_ib:
1782         ib_destroy_qp(ch->qp);
1783         ib_destroy_cq(ch->cq);
1784
1785 free_ch:
1786         kfree(ch);
1787
1788 reject:
1789         rej->opcode = SRP_LOGIN_REJ;
1790         rej->tag = req->tag;
1791         rej->buf_fmt =
1792             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1793
1794         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1795                              (void *)rej, sizeof *rej);
1796
1797 out:
1798         kfree(rep_param);
1799         kfree(rsp);
1800         kfree(rej);
1801
1802         return ret;
1803 }
1804
1805 /**
1806  * Release the channel with the specified cm_id.
1807  *
1808  * Returns one to indicate that the caller of srpt_cm_handler() should destroy
1809  * the cm_id.
1810  */
1811 static void srpt_find_and_release_channel(struct ib_cm_id *cm_id)
1812 {
1813         struct srpt_rdma_ch *ch;
1814
1815         ch = srpt_find_channel(cm_id, true);
1816         if (ch)
1817                 srpt_release_channel(ch, 0);
1818 }
1819
1820 static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
1821 {
1822         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1823         srpt_find_and_release_channel(cm_id);
1824 }
1825
1826 /**
1827  * Process an IB_CM_RTU_RECEIVED or IB_CM_USER_ESTABLISHED event.
1828  *
1829  * An IB_CM_RTU_RECEIVED message indicates that the connection is established
1830  * and that the recipient may begin transmitting (RTU = ready to use).
1831  */
1832 static int srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
1833 {
1834         struct srpt_rdma_ch *ch;
1835         int ret;
1836
1837         ch = srpt_find_channel(cm_id, false);
1838         if (!ch)
1839                 return -EINVAL;
1840
1841         if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_CONNECTING,
1842                                             RDMA_CHANNEL_LIVE)) {
1843                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1844
1845                 ret = srpt_ch_qp_rts(ch, ch->qp);
1846
1847                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
1848                                          wait_list) {
1849                         list_del(&ioctx->wait_list);
1850                         srpt_handle_new_iu(ch, ioctx);
1851                 }
1852                 if (ret && srpt_test_and_set_channel_state(ch,
1853                                         RDMA_CHANNEL_LIVE,
1854                                         RDMA_CHANNEL_DISCONNECTING)) {
1855                         TRACE_DBG("cm_id=%p sess_name=%s state=%d",
1856                                   cm_id, ch->sess_name, ch->state);
1857                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
1858                 }
1859         } else if (ch->state == RDMA_CHANNEL_DISCONNECTING) {
1860                 TRACE_DBG("cm_id=%p sess_name=%s state=%d",
1861                           cm_id, ch->sess_name, ch->state);
1862                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
1863                 ret = -EAGAIN;
1864         } else
1865                 ret = 0;
1866
1867         return ret;
1868 }
1869
1870 static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
1871 {
1872         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1873         srpt_find_and_release_channel(cm_id);
1874 }
1875
1876 static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
1877 {
1878         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1879         srpt_find_and_release_channel(cm_id);
1880 }
1881
1882 static int srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
1883 {
1884         struct srpt_rdma_ch *ch;
1885
1886         ch = srpt_find_channel(cm_id, false);
1887         if (!ch)
1888                 return -EINVAL;
1889
1890         TRACE_DBG("%s: cm_id= %p ch->state= %d",
1891                  __func__, cm_id, ch->state);
1892
1893         switch (ch->state) {
1894         case RDMA_CHANNEL_LIVE:
1895         case RDMA_CHANNEL_CONNECTING:
1896                 ib_send_cm_drep(ch->cm_id, NULL, 0);
1897                 break;
1898         case RDMA_CHANNEL_DISCONNECTING:
1899         default:
1900                 break;
1901         }
1902
1903         return 0;
1904 }
1905
1906 static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
1907 {
1908         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1909         srpt_find_and_release_channel(cm_id);
1910 }
1911
1912 /**
1913  * IB connection manager callback function.
1914  *
1915  * A non-zero return value will make the caller destroy the CM ID.
1916  *
1917  * Note: srpt_add_one passes a struct srpt_device* as the third argument to
1918  * the ib_create_cm_id() call.
1919  */
1920 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1921 {
1922         int ret = 0;
1923
1924         switch (event->event) {
1925         case IB_CM_REQ_RECEIVED:
1926                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
1927                                        event->private_data);
1928                 break;
1929         case IB_CM_REJ_RECEIVED:
1930                 srpt_cm_rej_recv(cm_id);
1931                 ret = -EINVAL;
1932                 break;
1933         case IB_CM_RTU_RECEIVED:
1934         case IB_CM_USER_ESTABLISHED:
1935                 ret = srpt_cm_rtu_recv(cm_id);
1936                 break;
1937         case IB_CM_DREQ_RECEIVED:
1938                 ret = srpt_cm_dreq_recv(cm_id);
1939                 break;
1940         case IB_CM_DREP_RECEIVED:
1941                 srpt_cm_drep_recv(cm_id);
1942                 ret = -EINVAL;
1943                 break;
1944         case IB_CM_TIMEWAIT_EXIT:
1945                 srpt_cm_timewait_exit(cm_id);
1946                 ret = -EINVAL;
1947                 break;
1948         case IB_CM_REP_ERROR:
1949                 srpt_cm_rep_error(cm_id);
1950                 ret = -EINVAL;
1951                 break;
1952         default:
1953                 break;
1954         }
1955
1956         return ret;
1957 }
1958
1959 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1960                                  struct srpt_ioctx *ioctx,
1961                                  struct scst_cmd *scmnd)
1962 {
1963         struct scatterlist *scat;
1964         scst_data_direction dir;
1965         struct rdma_iu *riu;
1966         struct srp_direct_buf *db;
1967         dma_addr_t dma_addr;
1968         struct ib_sge *sge;
1969         u64 raddr;
1970         u32 rsize;
1971         u32 tsize;
1972         u32 dma_len;
1973         int count, nrdma;
1974         int i, j, k;
1975
1976         scat = scst_cmd_get_sg(scmnd);
1977         dir = scst_cmd_get_data_direction(scmnd);
1978         count = dma_map_sg(ch->sport->sdev->device->dma_device, scat,
1979                            scst_cmd_get_sg_cnt(scmnd),
1980                            scst_to_tgt_dma_dir(dir));
1981         if (unlikely(!count))
1982                 return -EBUSY;
1983
1984         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
1985                 nrdma = ioctx->n_rdma_ius;
1986         else {
1987                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
1988
1989                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
1990                                           scst_cmd_atomic(scmnd)
1991                                           ? GFP_ATOMIC : GFP_KERNEL);
1992                 if (!ioctx->rdma_ius) {
1993                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1994                                      scat, scst_cmd_get_sg_cnt(scmnd),
1995                                      scst_to_tgt_dma_dir(dir));
1996                         return -ENOMEM;
1997                 }
1998
1999                 ioctx->n_rdma_ius = nrdma;
2000         }
2001
2002         db = ioctx->rbufs;
2003         tsize = (dir == SCST_DATA_READ) ?
2004                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2005         dma_len = sg_dma_len(&scat[0]);
2006         riu = ioctx->rdma_ius;
2007
2008         /*
2009          * For each remote desc - calculate the #ib_sge.
2010          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
2011          *      each remote desc rdma_iu is required a rdma wr;
2012          * else
2013          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
2014          *      another rdma wr
2015          */
2016         for (i = 0, j = 0;
2017              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2018                 rsize = be32_to_cpu(db->len);
2019                 raddr = be64_to_cpu(db->va);
2020                 riu->raddr = raddr;
2021                 riu->rkey = be32_to_cpu(db->key);
2022                 riu->sge_cnt = 0;
2023
2024                 /* calculate how many sge required for this remote_buf */
2025                 while (rsize > 0 && tsize > 0) {
2026
2027                         if (rsize >= dma_len) {
2028                                 tsize -= dma_len;
2029                                 rsize -= dma_len;
2030                                 raddr += dma_len;
2031
2032                                 if (tsize > 0) {
2033                                         ++j;
2034                                         if (j < count)
2035                                                 dma_len = sg_dma_len(&scat[j]);
2036                                 }
2037                         } else {
2038                                 tsize -= rsize;
2039                                 dma_len -= rsize;
2040                                 rsize = 0;
2041                         }
2042
2043                         ++riu->sge_cnt;
2044
2045                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
2046                                 riu->sge =
2047                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
2048                                             scst_cmd_atomic(scmnd)
2049                                             ? GFP_ATOMIC : GFP_KERNEL);
2050                                 if (!riu->sge)
2051                                         goto free_mem;
2052
2053                                 ++ioctx->n_rdma;
2054                                 ++riu;
2055                                 riu->sge_cnt = 0;
2056                                 riu->raddr = raddr;
2057                                 riu->rkey = be32_to_cpu(db->key);
2058                         }
2059                 }
2060
2061                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
2062                                    scst_cmd_atomic(scmnd)
2063                                    ? GFP_ATOMIC : GFP_KERNEL);
2064
2065                 if (!riu->sge)
2066                         goto free_mem;
2067
2068                 ++ioctx->n_rdma;
2069         }
2070
2071         db = ioctx->rbufs;
2072         scat = scst_cmd_get_sg(scmnd);
2073         tsize = (dir == SCST_DATA_READ) ?
2074                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2075         riu = ioctx->rdma_ius;
2076         dma_len = sg_dma_len(&scat[0]);
2077         dma_addr = sg_dma_address(&scat[0]);
2078
2079         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
2080         for (i = 0, j = 0;
2081              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2082                 rsize = be32_to_cpu(db->len);
2083                 sge = riu->sge;
2084                 k = 0;
2085
2086                 while (rsize > 0 && tsize > 0) {
2087                         sge->addr = dma_addr;
2088                         sge->lkey = ch->sport->sdev->mr->lkey;
2089
2090                         if (rsize >= dma_len) {
2091                                 sge->length =
2092                                         (tsize < dma_len) ? tsize : dma_len;
2093                                 tsize -= dma_len;
2094                                 rsize -= dma_len;
2095
2096                                 if (tsize > 0) {
2097                                         ++j;
2098                                         if (j < count) {
2099                                                 dma_len = sg_dma_len(&scat[j]);
2100                                                 dma_addr =
2101                                                     sg_dma_address(&scat[j]);
2102                                         }
2103                                 }
2104                         } else {
2105                                 sge->length = (tsize < rsize) ? tsize : rsize;
2106                                 tsize -= rsize;
2107                                 dma_len -= rsize;
2108                                 dma_addr += rsize;
2109                                 rsize = 0;
2110                         }
2111
2112                         ++k;
2113                         if (k == riu->sge_cnt && rsize > 0) {
2114                                 ++riu;
2115                                 sge = riu->sge;
2116                                 k = 0;
2117                         } else if (rsize > 0)
2118                                 ++sge;
2119                 }
2120         }
2121
2122         return 0;
2123
2124 free_mem:
2125         while (ioctx->n_rdma)
2126                 kfree(ioctx->rdma_ius[ioctx->n_rdma--].sge);
2127
2128         kfree(ioctx->rdma_ius);
2129
2130         dma_unmap_sg(ch->sport->sdev->device->dma_device,
2131                      scat, scst_cmd_get_sg_cnt(scmnd),
2132                      scst_to_tgt_dma_dir(dir));
2133
2134         return -ENOMEM;
2135 }
2136
2137 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2138                               scst_data_direction dir)
2139 {
2140         struct ib_send_wr wr;
2141         struct ib_send_wr *bad_wr;
2142         struct rdma_iu *riu;
2143         int i;
2144         int ret = 0;
2145
2146         riu = ioctx->rdma_ius;
2147         memset(&wr, 0, sizeof wr);
2148
2149         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
2150                 wr.opcode = (dir == SCST_DATA_READ) ?
2151                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
2152                 wr.next = NULL;
2153                 wr.wr_id = ioctx->index;
2154                 wr.wr.rdma.remote_addr = riu->raddr;
2155                 wr.wr.rdma.rkey = riu->rkey;
2156                 wr.num_sge = riu->sge_cnt;
2157                 wr.sg_list = riu->sge;
2158
2159                 /* only get completion event for the last rdma wr */
2160                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
2161                         wr.send_flags = IB_SEND_SIGNALED;
2162
2163                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
2164                 if (ret)
2165                         break;
2166         }
2167
2168         return ret;
2169 }
2170
2171 /*
2172  * Start data reception. Must not block.
2173  */
2174 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2175                           struct scst_cmd *scmnd)
2176 {
2177         int ret;
2178
2179         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
2180         if (ret) {
2181                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
2182                 ret = SCST_TGT_RES_QUEUE_FULL;
2183                 goto out;
2184         }
2185
2186         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
2187         if (ret) {
2188                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
2189                 if (ret == -EAGAIN || ret == -ENOMEM)
2190                         ret = SCST_TGT_RES_QUEUE_FULL;
2191                 else
2192                         ret = SCST_TGT_RES_FATAL_ERROR;
2193                 goto out;
2194         }
2195
2196         ret = SCST_TGT_RES_SUCCESS;
2197
2198 out:
2199         return ret;
2200 }
2201
2202 /*
2203  * Called by the SCST core to inform ib_srpt that data reception should start.
2204  * Must not block.
2205  */
2206 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
2207 {
2208         struct srpt_rdma_ch *ch;
2209         struct srpt_ioctx *ioctx;
2210
2211         ioctx = scst_cmd_get_tgt_priv(scmnd);
2212         BUG_ON(!ioctx);
2213
2214         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2215         BUG_ON(!ch);
2216
2217         if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2218                 return SCST_TGT_RES_FATAL_ERROR;
2219         else if (ch->state == RDMA_CHANNEL_CONNECTING)
2220                 return SCST_TGT_RES_QUEUE_FULL;
2221
2222         ioctx->state = SRPT_STATE_NEED_DATA;
2223
2224         return srpt_xfer_data(ch, ioctx, scmnd);
2225 }
2226
2227 /*
2228  * Called by the SCST core. Transmits the response buffer and status held in
2229  * 'scmnd'. Must not block.
2230  */
2231 static int srpt_xmit_response(struct scst_cmd *scmnd)
2232 {
2233         struct srpt_rdma_ch *ch;
2234         struct srpt_ioctx *ioctx;
2235         struct srp_rsp *srp_rsp;
2236         u64 tag;
2237         int ret = SCST_TGT_RES_SUCCESS;
2238         int dir;
2239         int status;
2240
2241         ioctx = scst_cmd_get_tgt_priv(scmnd);
2242         BUG_ON(!ioctx);
2243
2244         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2245         BUG_ON(!ch);
2246
2247         tag = scst_cmd_get_tag(scmnd);
2248
2249         if (ch->state != RDMA_CHANNEL_LIVE) {
2250                 printk(KERN_ERR PFX
2251                        "%s: tag= %lld channel in bad state %d\n",
2252                        __func__, (unsigned long long)tag, ch->state);
2253
2254                 if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2255                         ret = SCST_TGT_RES_FATAL_ERROR;
2256                 else if (ch->state == RDMA_CHANNEL_CONNECTING)
2257                         ret = SCST_TGT_RES_QUEUE_FULL;
2258
2259                 if (unlikely(scst_cmd_aborted(scmnd)))
2260                         goto out_aborted;
2261
2262                 goto out;
2263         }
2264
2265         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
2266                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
2267
2268         srp_rsp = ioctx->buf;
2269
2270         if (unlikely(scst_cmd_aborted(scmnd))) {
2271                 printk(KERN_ERR PFX
2272                        "%s: tag= %lld already get aborted\n",
2273                        __func__, (unsigned long long)tag);
2274                 goto out_aborted;
2275         }
2276
2277         dir = scst_cmd_get_data_direction(scmnd);
2278         status = scst_cmd_get_status(scmnd) & 0xff;
2279
2280         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2281
2282         if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2283                 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2284                 if (srp_rsp->sense_data_len >
2285                     (MAX_MESSAGE_SIZE - sizeof *srp_rsp))
2286                         srp_rsp->sense_data_len =
2287                             MAX_MESSAGE_SIZE - sizeof *srp_rsp;
2288
2289                 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2290                        srp_rsp->sense_data_len);
2291
2292                 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2293                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2294
2295                 if (!status)
2296                         status = SAM_STAT_CHECK_CONDITION;
2297         }
2298
2299         srp_rsp->status = status;
2300
2301         /* transfer read data if any */
2302         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2303                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2304                 if (ret != SCST_TGT_RES_SUCCESS) {
2305                         printk(KERN_ERR PFX
2306                                "%s: tag= %lld xfer_data failed\n",
2307                                __func__, (unsigned long long)tag);
2308                         goto out;
2309                 }
2310         }
2311
2312         ioctx->state = SRPT_STATE_PROCESSED;
2313
2314         if (srpt_post_send(ch, ioctx,
2315                            sizeof *srp_rsp +
2316                            be32_to_cpu(srp_rsp->sense_data_len))) {
2317                 printk(KERN_ERR PFX "%s: ch->state= %d tag= %lld\n",
2318                        __func__, ch->state,
2319                        (unsigned long long)tag);
2320                 ret = SCST_TGT_RES_FATAL_ERROR;
2321         }
2322
2323 out:
2324         return ret;
2325
2326 out_aborted:
2327         ret = SCST_TGT_RES_SUCCESS;
2328         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
2329         ioctx->state = SRPT_STATE_ABORTED;
2330         scst_tgt_cmd_done(scmnd, SCST_CONTEXT_SAME);
2331         goto out;
2332 }
2333
2334 /*
2335  * Called by the SCST core to inform ib_srpt that a received task management
2336  * function has been completed. Must not block.
2337  */
2338 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2339 {
2340         struct srpt_rdma_ch *ch;
2341         struct srpt_mgmt_ioctx *mgmt_ioctx;
2342         struct srpt_ioctx *ioctx;
2343         int rsp_len;
2344
2345         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2346         BUG_ON(!mgmt_ioctx);
2347
2348         ch = mgmt_ioctx->ch;
2349         BUG_ON(!ch);
2350
2351         ioctx = mgmt_ioctx->ioctx;
2352         BUG_ON(!ioctx);
2353
2354         TRACE_DBG("%s: tsk_mgmt_done for tag= %lld status=%d\n",
2355                   __func__, (unsigned long long)mgmt_ioctx->tag,
2356                   scst_mgmt_cmd_get_status(mcmnd));
2357
2358         rsp_len = srpt_build_tskmgmt_rsp(ch, ioctx,
2359                                          (scst_mgmt_cmd_get_status(mcmnd) ==
2360                                           SCST_MGMT_STATUS_SUCCESS) ?
2361                                          SRP_TSK_MGMT_SUCCESS :
2362                                          SRP_TSK_MGMT_FAILED,
2363                                          mgmt_ioctx->tag);
2364         srpt_post_send(ch, ioctx, rsp_len);
2365
2366         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2367
2368         kfree(mgmt_ioctx);
2369 }
2370
2371 /*
2372  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2373  * to be freed. May be called in IRQ context.
2374  */
2375 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2376 {
2377         struct srpt_rdma_ch *ch;
2378         struct srpt_ioctx *ioctx;
2379
2380         ioctx = scst_cmd_get_tgt_priv(scmnd);
2381         BUG_ON(!ioctx);
2382
2383         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2384         BUG_ON(!ch);
2385
2386         spin_lock_irq(&ch->spinlock);
2387         list_del(&ioctx->scmnd_list);
2388         ch->active_scmnd_cnt--;
2389         spin_unlock_irq(&ch->spinlock);
2390
2391         srpt_reset_ioctx(ch, ioctx);
2392         scst_cmd_set_tgt_priv(scmnd, NULL);
2393 }
2394
2395 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2396 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2397 static void srpt_refresh_port_work(void *ctx)
2398 #else
2399 static void srpt_refresh_port_work(struct work_struct *work)
2400 #endif
2401 {
2402 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2403         struct srpt_port *sport = (struct srpt_port *)ctx;
2404 #else
2405         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2406 #endif
2407
2408         srpt_refresh_port(sport);
2409 }
2410
2411 /*
2412  * Called by the SCST core to detect target adapters. Returns the number of
2413  * detected target adapters.
2414  */
2415 static int srpt_detect(struct scst_tgt_template *tp)
2416 {
2417         int device_count;
2418
2419         TRACE_ENTRY();
2420
2421         device_count = atomic_read(&srpt_device_count);
2422
2423         TRACE_EXIT_RES(device_count);
2424
2425         return device_count;
2426 }
2427
2428 /*
2429  * Callback function called by the SCST core from scst_unregister() to free up
2430  * the resources associated with device scst_tgt.
2431  */
2432 static int srpt_release(struct scst_tgt *scst_tgt)
2433 {
2434         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2435         struct srpt_rdma_ch *ch, *tmp_ch;
2436
2437         TRACE_ENTRY();
2438
2439         BUG_ON(!scst_tgt);
2440 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2441         WARN_ON(!sdev);
2442         if (!sdev)
2443                 return -ENODEV;
2444 #else
2445         if (WARN_ON(!sdev))
2446                 return -ENODEV;
2447 #endif
2448
2449         srpt_unregister_procfs_entry(scst_tgt->tgtt);
2450
2451         spin_lock_irq(&sdev->spinlock);
2452         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
2453                 list_del(&ch->list);
2454                 spin_unlock_irq(&sdev->spinlock);
2455                 srpt_release_channel(ch, 1);
2456                 spin_lock_irq(&sdev->spinlock);
2457         }
2458         spin_unlock_irq(&sdev->spinlock);
2459
2460         srpt_unregister_mad_agent(sdev);
2461
2462         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2463
2464         TRACE_EXIT();
2465
2466         return 0;
2467 }
2468
2469 /*
2470  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2471  * when the module parameter 'thread' is not zero (the default is zero).
2472  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2473  *
2474  * @pre thread != 0
2475  */
2476 static int srpt_ioctx_thread(void *arg)
2477 {
2478         struct srpt_ioctx *ioctx;
2479
2480         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2481         current->flags |= PF_NOFREEZE;
2482
2483         spin_lock_irq(&srpt_thread.thread_lock);
2484         while (!kthread_should_stop()) {
2485                 wait_queue_t wait;
2486                 init_waitqueue_entry(&wait, current);
2487
2488                 if (!srpt_test_ioctx_list()) {
2489                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2490
2491                         for (;;) {
2492                                 set_current_state(TASK_INTERRUPTIBLE);
2493                                 if (srpt_test_ioctx_list())
2494                                         break;
2495                                 spin_unlock_irq(&srpt_thread.thread_lock);
2496                                 schedule();
2497                                 spin_lock_irq(&srpt_thread.thread_lock);
2498                         }
2499                         set_current_state(TASK_RUNNING);
2500                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2501                 }
2502
2503                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2504                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2505                                            struct srpt_ioctx, comp_list);
2506
2507                         list_del(&ioctx->comp_list);
2508
2509                         spin_unlock_irq(&srpt_thread.thread_lock);
2510                         switch (ioctx->op) {
2511                         case IB_WC_SEND:
2512                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2513                                         SCST_CONTEXT_DIRECT);
2514                                 break;
2515                         case IB_WC_RDMA_WRITE:
2516                         case IB_WC_RDMA_READ:
2517                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2518                                 break;
2519                         case IB_WC_RECV:
2520                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2521                                 break;
2522                         default:
2523                                 break;
2524                         }
2525                         spin_lock_irq(&srpt_thread.thread_lock);
2526                 }
2527         }
2528         spin_unlock_irq(&srpt_thread.thread_lock);
2529
2530         return 0;
2531 }
2532
2533 /* SCST target template for the SRP target implementation. */
2534 static struct scst_tgt_template srpt_template = {
2535         .name = DRV_NAME,
2536         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2537         .xmit_response_atomic = 1,
2538         .rdy_to_xfer_atomic = 1,
2539         .no_proc_entry = 0,
2540         .detect = srpt_detect,
2541         .release = srpt_release,
2542         .xmit_response = srpt_xmit_response,
2543         .rdy_to_xfer = srpt_rdy_to_xfer,
2544         .on_free_cmd = srpt_on_free_cmd,
2545         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2546 };
2547
2548 /*
2549  * The callback function srpt_release_class_dev() is called whenever a
2550  * device is removed from the /sys/class/infiniband_srpt device class.
2551  */
2552 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2553 static void srpt_release_class_dev(struct class_device *class_dev)
2554 #else
2555 static void srpt_release_class_dev(struct device *dev)
2556 #endif
2557 {
2558 }
2559
2560 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2561 static int srpt_trace_level_show(struct seq_file *seq, void *v)
2562 {
2563         return scst_proc_log_entry_read(seq, trace_flag, NULL);
2564 }
2565
2566 static ssize_t srpt_proc_trace_level_write(struct file *file,
2567         const char __user *buf, size_t length, loff_t *off)
2568 {
2569         return scst_proc_log_entry_write(file, buf, length, &trace_flag,
2570                 DEFAULT_SRPT_TRACE_FLAGS, NULL);
2571 }
2572
2573 static struct scst_proc_data srpt_log_proc_data = {
2574         SCST_DEF_RW_SEQ_OP(srpt_proc_trace_level_write)
2575         .show = srpt_trace_level_show,
2576 };
2577 #endif
2578
2579 static struct class_attribute srpt_class_attrs[] = {
2580         __ATTR_NULL,
2581 };
2582
2583 static struct class srpt_class = {
2584         .name = "infiniband_srpt",
2585 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2586         .release = srpt_release_class_dev,
2587 #else
2588         .dev_release = srpt_release_class_dev,
2589 #endif
2590         .class_attrs = srpt_class_attrs,
2591 };
2592
2593 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2594 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2595 #else
2596 static ssize_t show_login_info(struct device *dev,
2597                                struct device_attribute *attr, char *buf)
2598 #endif
2599 {
2600         struct srpt_device *sdev =
2601 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2602                 container_of(class_dev, struct srpt_device, class_dev);
2603 #else
2604                 container_of(dev, struct srpt_device, dev);
2605 #endif
2606         struct srpt_port *sport;
2607         int i;
2608         int len = 0;
2609
2610         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2611                 sport = &sdev->port[i];
2612
2613                 len += sprintf(buf + len,
2614                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2615                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2616                                "service_id=%016llx\n",
2617                                (unsigned long long) srpt_service_guid,
2618                                (unsigned long long) srpt_service_guid,
2619                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2620                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2621                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2622                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2623                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2624                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2625                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2626                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2627                                (unsigned long long) srpt_service_guid);
2628         }
2629
2630         return len;
2631 }
2632
2633 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2634 static CLASS_DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2635 #else
2636 static DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2637 #endif
2638
2639 /*
2640  * Callback function called by the InfiniBand core when either an InfiniBand
2641  * device has been added or during the ib_register_client() call for each
2642  * registered InfiniBand device.
2643  */
2644 static void srpt_add_one(struct ib_device *device)
2645 {
2646         struct srpt_device *sdev;
2647         struct srpt_port *sport;
2648         struct ib_srq_init_attr srq_attr;
2649         int i;
2650
2651         TRACE_ENTRY();
2652
2653         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2654         if (!sdev)
2655                 return;
2656
2657         sdev->device = device;
2658
2659 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2660         sdev->class_dev.class = &srpt_class;
2661         sdev->class_dev.dev = device->dma_device;
2662         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2663                  "srpt-%s", device->name);
2664 #else
2665         sdev->dev.class = &srpt_class;
2666         sdev->dev.parent = device->dma_device;
2667 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2668         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2669 #else
2670         snprintf(sdev->init_name, sizeof(sdev->init_name),
2671                  "srpt-%s", device->name);
2672         sdev->dev.init_name = sdev->init_name;
2673 #endif
2674 #endif
2675
2676 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2677         if (class_device_register(&sdev->class_dev))
2678                 goto free_dev;
2679         if (class_device_create_file(&sdev->class_dev,
2680                                      &class_device_attr_login_info))
2681                 goto err_dev;
2682 #else
2683         if (device_register(&sdev->dev))
2684                 goto free_dev;
2685         if (device_create_file(&sdev->dev, &dev_attr_login_info))
2686                 goto err_dev;
2687 #endif
2688
2689         if (ib_query_device(device, &sdev->dev_attr))
2690                 goto err_dev;
2691
2692         sdev->pd = ib_alloc_pd(device);
2693         if (IS_ERR(sdev->pd))
2694                 goto err_dev;
2695
2696         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2697         if (IS_ERR(sdev->mr))
2698                 goto err_pd;
2699
2700         srq_attr.event_handler = srpt_srq_event;
2701         srq_attr.srq_context = (void *)sdev;
2702         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2703         srq_attr.attr.max_sge = 1;
2704         srq_attr.attr.srq_limit = 0;
2705
2706         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2707         if (IS_ERR(sdev->srq))
2708                 goto err_mr;
2709
2710         TRACE_DBG("%s: create SRQ #wr= %d max_allow=%d dev= %s",
2711                __func__, srq_attr.attr.max_wr,
2712               sdev->dev_attr.max_srq_wr, device->name);
2713
2714         if (!srpt_service_guid)
2715                 srpt_service_guid = be64_to_cpu(device->node_guid);
2716
2717         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2718         if (IS_ERR(sdev->cm_id))
2719                 goto err_srq;
2720
2721         /* print out target login information */
2722         TRACE_DBG("Target login info: id_ext=%016llx,"
2723                   "ioc_guid=%016llx,pkey=ffff,service_id=%016llx",
2724                   (unsigned long long) srpt_service_guid,
2725                   (unsigned long long) srpt_service_guid,
2726                   (unsigned long long) srpt_service_guid);
2727
2728         /*
2729          * We do not have a consistent service_id (ie. also id_ext of target_id)
2730          * to identify this target. We currently use the guid of the first HCA
2731          * in the system as service_id; therefore, the target_id will change
2732          * if this HCA is gone bad and replaced by different HCA
2733          */
2734         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0, NULL))
2735                 goto err_cm;
2736
2737         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2738                               srpt_event_handler);
2739         if (ib_register_event_handler(&sdev->event_handler))
2740                 goto err_cm;
2741
2742         if (srpt_alloc_ioctx_ring(sdev))
2743                 goto err_event;
2744
2745         INIT_LIST_HEAD(&sdev->rch_list);
2746         spin_lock_init(&sdev->spinlock);
2747
2748         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2749                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2750
2751         ib_set_client_data(device, &srpt_client, sdev);
2752
2753         sdev->scst_tgt = scst_register(&srpt_template, NULL);
2754         if (!sdev->scst_tgt) {
2755                 printk(KERN_ERR PFX "SCST registration failed for %s.\n",
2756                         sdev->device->name);
2757                 goto err_ring;
2758         }
2759
2760         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
2761
2762         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
2763                 sport = &sdev->port[i - 1];
2764                 sport->sdev = sdev;
2765                 sport->port = i;
2766 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2767                 /*
2768                  * A vanilla 2.6.19 or older kernel without backported OFED
2769                  * kernel headers.
2770                  */
2771                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
2772 #else
2773                 INIT_WORK(&sport->work, srpt_refresh_port_work);
2774 #endif
2775                 if (srpt_refresh_port(sport)) {
2776                         printk(KERN_ERR PFX "MAD registration failed"
2777                                " for %s-%d.\n", sdev->device->name, i);
2778                         goto err_refresh_port;
2779                 }
2780         }
2781
2782         atomic_inc(&srpt_device_count);
2783
2784         TRACE_EXIT();
2785
2786         return;
2787
2788 err_refresh_port:
2789         scst_unregister(sdev->scst_tgt);
2790 err_ring:
2791         ib_set_client_data(device, &srpt_client, NULL);
2792         srpt_free_ioctx_ring(sdev);
2793 err_event:
2794         ib_unregister_event_handler(&sdev->event_handler);
2795 err_cm:
2796         ib_destroy_cm_id(sdev->cm_id);
2797 err_srq:
2798         ib_destroy_srq(sdev->srq);
2799 err_mr:
2800         ib_dereg_mr(sdev->mr);
2801 err_pd:
2802         ib_dealloc_pd(sdev->pd);
2803 err_dev:
2804 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2805         class_device_unregister(&sdev->class_dev);
2806 #else
2807         device_unregister(&sdev->dev);
2808 #endif
2809 free_dev:
2810         kfree(sdev);
2811
2812         TRACE_EXIT();
2813 }
2814
2815 /*
2816  * Callback function called by the InfiniBand core when either an InfiniBand
2817  * device has been removed or during the ib_unregister_client() call for each
2818  * registered InfiniBand device.
2819  */
2820 static void srpt_remove_one(struct ib_device *device)
2821 {
2822         int i;
2823         struct srpt_device *sdev;
2824
2825         TRACE_ENTRY();
2826
2827         sdev = ib_get_client_data(device, &srpt_client);
2828 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2829         WARN_ON(!sdev);
2830         if (!sdev)
2831                 return;
2832 #else
2833         if (WARN_ON(!sdev))
2834                 return;
2835 #endif
2836
2837         /*
2838          * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
2839          * finished if it is running.
2840          */
2841         for (i = 0; i < sdev->device->phys_port_cnt; i++)
2842 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
2843                 cancel_work_sync(&sdev->port[i].work);
2844 #else
2845                 /*
2846                  * cancel_work_sync() was introduced in kernel 2.6.22. Older
2847                  * kernels do not have a facility to cancel scheduled work.
2848                  */
2849                 printk(KERN_ERR PFX
2850                        "your kernel does not provide cancel_work_sync().\n");
2851 #endif
2852
2853         scst_unregister(sdev->scst_tgt);
2854         sdev->scst_tgt = NULL;
2855
2856         ib_unregister_event_handler(&sdev->event_handler);
2857         ib_destroy_cm_id(sdev->cm_id);
2858         ib_destroy_srq(sdev->srq);
2859         ib_dereg_mr(sdev->mr);
2860         ib_dealloc_pd(sdev->pd);
2861 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2862         class_device_unregister(&sdev->class_dev);
2863 #else
2864         device_unregister(&sdev->dev);
2865 #endif
2866
2867         srpt_free_ioctx_ring(sdev);
2868         kfree(sdev);
2869
2870         TRACE_EXIT();
2871 }
2872
2873 /**
2874  * Create procfs entries for srpt. Currently the only procfs entry created
2875  * by this function is the "trace_level" entry.
2876  */
2877 static int srpt_register_procfs_entry(struct scst_tgt_template *tgt)
2878 {
2879         int res = 0;
2880 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2881         struct proc_dir_entry *p, *root;
2882
2883         root = scst_proc_get_tgt_root(tgt);
2884         WARN_ON(!root);
2885         if (root) {
2886                 /*
2887                  * Fill in the scst_proc_data::data pointer, which is used in
2888                  * a printk(KERN_INFO ...) statement in
2889                  * scst_proc_log_entry_write() in scst_proc.c.
2890                  */
2891                 srpt_log_proc_data.data = (char *)tgt->name;
2892                 p = scst_create_proc_entry(root, SRPT_PROC_TRACE_LEVEL_NAME,
2893                                            &srpt_log_proc_data);
2894                 if (!p)
2895                         res = -ENOMEM;
2896         } else
2897                 res = -ENOMEM;
2898
2899 #endif
2900         return res;
2901 }
2902
2903 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt)
2904 {
2905 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2906         struct proc_dir_entry *root;
2907
2908         root = scst_proc_get_tgt_root(tgt);
2909         WARN_ON(!root);
2910         if (root)
2911                 remove_proc_entry(SRPT_PROC_TRACE_LEVEL_NAME, root);
2912 #endif
2913 }
2914
2915 /*
2916  * Module initialization.
2917  *
2918  * Note: since ib_register_client() registers callback functions, and since at
2919  * least one of these callback functions (srpt_add_one()) calls SCST functions,
2920  * the SCST target template must be registered before ib_register_client() is
2921  * called.
2922  */
2923 static int __init srpt_init_module(void)
2924 {
2925         int ret;
2926
2927         ret = class_register(&srpt_class);
2928         if (ret) {
2929                 printk(KERN_ERR PFX "couldn't register class ib_srpt\n");
2930                 goto out;
2931         }
2932
2933         ret = scst_register_target_template(&srpt_template);
2934         if (ret < 0) {
2935                 printk(KERN_ERR PFX "couldn't register with scst\n");
2936                 ret = -ENODEV;
2937                 goto out_unregister_class;
2938         }
2939
2940         ret = srpt_register_procfs_entry(&srpt_template);
2941         if (ret) {
2942                 printk(KERN_ERR PFX "couldn't register procfs entry\n");
2943                 goto out_unregister_target;
2944         }
2945
2946         ret = ib_register_client(&srpt_client);
2947         if (ret) {
2948                 printk(KERN_ERR PFX "couldn't register IB client\n");
2949                 goto out_unregister_target;
2950         }
2951
2952         if (thread) {
2953                 spin_lock_init(&srpt_thread.thread_lock);
2954                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
2955                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
2956                                                  NULL, "srpt_thread");
2957                 if (IS_ERR(srpt_thread.thread)) {
2958                         srpt_thread.thread = NULL;
2959                         thread = 0;
2960                 }
2961         }
2962
2963         return 0;
2964
2965 out_unregister_target:
2966         /*
2967          * Note: the procfs entry is unregistered in srpt_release(), which is
2968          * called by scst_unregister_target_template().
2969          */
2970         scst_unregister_target_template(&srpt_template);
2971 out_unregister_class:
2972         class_unregister(&srpt_class);
2973 out:
2974         return ret;
2975 }
2976
2977 static void __exit srpt_cleanup_module(void)
2978 {
2979         TRACE_ENTRY();
2980
2981         if (srpt_thread.thread)
2982                 kthread_stop(srpt_thread.thread);
2983         ib_unregister_client(&srpt_client);
2984         scst_unregister_target_template(&srpt_template);
2985         class_unregister(&srpt_class);
2986
2987         TRACE_EXIT();
2988 }
2989
2990 module_init(srpt_init_module);
2991 module_exit(srpt_cleanup_module);