985571c159b58f0cc5a0692adcb94e6dbdb81926
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43 #include <asm/atomic.h>
44 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #endif
48 #include "ib_srpt.h"
49 #include "scst_debug.h"
50
51 /* Name of this kernel module. */
52 #define DRV_NAME                "ib_srpt"
53 /* Prefix for printk() kernel messages. */
54 #define PFX                     DRV_NAME ": "
55 #define DRV_VERSION             "1.0.1"
56 #define DRV_RELDATE             "July 10, 2008"
57 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
58 /* Flags to be used in SCST debug tracing statements. */
59 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
60                                   | TRACE_MGMT | TRACE_SPECIAL)
61 /* Name of the entry that will be created under /proc/scsi_tgt/ib_srpt. */
62 #define SRPT_PROC_TRACE_LEVEL_NAME      "trace_level"
63 #endif
64
65 #define MELLANOX_SRPT_ID_STRING "Mellanox OFED SRP target"
66
67 MODULE_AUTHOR("Vu Pham");
68 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
69                    "v" DRV_VERSION " (" DRV_RELDATE ")");
70 MODULE_LICENSE("Dual BSD/GPL");
71
72 struct srpt_thread {
73         /* Protects thread_ioctx_list. */
74         spinlock_t thread_lock;
75         /* I/O contexts to be processed by the kernel thread. */
76         struct list_head thread_ioctx_list;
77         /* SRPT kernel thread. */
78         struct task_struct *thread;
79 };
80
81 /*
82  * Global Variables
83  */
84
85 static u64 mellanox_ioc_guid;
86 /* List of srpt_device structures. */
87 static atomic_t srpt_device_count;
88 static int thread;
89 static struct srpt_thread srpt_thread;
90 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
91 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
92 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
93 module_param(trace_flag, long, 0644);
94 MODULE_PARM_DESC(trace_flag,
95                  "Trace flags for the ib_srpt kernel module.");
96 #endif
97
98 module_param(thread, int, 0444);
99 MODULE_PARM_DESC(thread,
100                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
101                  "where possible");
102
103 static void srpt_add_one(struct ib_device *device);
104 static void srpt_remove_one(struct ib_device *device);
105 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
106 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt);
107
108 static struct ib_client srpt_client = {
109         .name = DRV_NAME,
110         .add = srpt_add_one,
111         .remove = srpt_remove_one
112 };
113
114 /**
115  * Atomically test and set the channel state.
116  * @ch: RDMA channel.
117  * @old: channel state to compare with.
118  * @new: state to change the channel state to if the current state matches the
119  *       argument 'old'.
120  *
121  * Returns true if the channel state matched old upon entry of this function,
122  * and false otherwise.
123  */
124 static bool srpt_test_and_set_channel_state(struct srpt_rdma_ch *ch,
125                                             enum rdma_ch_state old,
126                                             enum rdma_ch_state new)
127 {
128         unsigned long flags;
129         enum rdma_ch_state cur;
130
131         spin_lock_irqsave(&ch->spinlock, flags);
132         cur = ch->state;
133         if (cur == old)
134                 ch->state = new;
135         spin_unlock_irqrestore(&ch->spinlock, flags);
136
137         return cur == old;
138 }
139
140 /*
141  * Callback function called by the InfiniBand core when an asynchronous IB
142  * event occurs. This callback may occur in interrupt context. See also
143  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
144  * Architecture Specification.
145  */
146 static void srpt_event_handler(struct ib_event_handler *handler,
147                                struct ib_event *event)
148 {
149         struct srpt_device *sdev;
150         struct srpt_port *sport;
151
152         sdev = ib_get_client_data(event->device, &srpt_client);
153         if (!sdev || sdev->device != event->device)
154                 return;
155
156         TRACE_DBG("ASYNC event= %d on device= %s",
157                   event->event, sdev->device->name);
158
159         switch (event->event) {
160         case IB_EVENT_PORT_ERR:
161                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
162                         sport = &sdev->port[event->element.port_num - 1];
163                         sport->lid = 0;
164                         sport->sm_lid = 0;
165                 }
166                 break;
167         case IB_EVENT_PORT_ACTIVE:
168         case IB_EVENT_LID_CHANGE:
169         case IB_EVENT_PKEY_CHANGE:
170         case IB_EVENT_SM_CHANGE:
171         case IB_EVENT_CLIENT_REREGISTER:
172                 /*
173                  * Refresh port data asynchronously. Note: it is safe to call
174                  * schedule_work() even if &sport->work is already on the
175                  * global workqueue because schedule_work() tests for the
176                  * work_pending() condition before adding &sport->work to the
177                  * global work queue.
178                  */
179                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
180                         sport = &sdev->port[event->element.port_num - 1];
181                         if (!sport->lid && !sport->sm_lid)
182                                 schedule_work(&sport->work);
183                 }
184                 break;
185         default:
186                 break;
187         }
188
189 }
190
191 /*
192  * Callback function called by the InfiniBand core for SRQ (shared receive
193  * queue) events.
194  */
195 static void srpt_srq_event(struct ib_event *event, void *ctx)
196 {
197         TRACE_DBG("SRQ event %d", event->event);
198 }
199
200 /*
201  * Callback function called by the InfiniBand core for QP (queue pair) events.
202  */
203 static void srpt_qp_event(struct ib_event *event, void *ctx)
204 {
205         struct srpt_rdma_ch *ch = ctx;
206
207         TRACE_DBG("QP event %d on cm_id=%p sess_name=%s state=%d",
208                   event->event, ch->cm_id, ch->sess_name, ch->state);
209
210         switch (event->event) {
211         case IB_EVENT_COMM_EST:
212 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
213                 ib_cm_notify(ch->cm_id, event->event);
214 #else
215                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
216                 printk(KERN_ERR PFX "how to perform ib_cm_notify() on a"
217                         " vanilla 2.6.18 kernel ???\n");
218 #endif
219                 break;
220         case IB_EVENT_QP_LAST_WQE_REACHED:
221                 if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_LIVE,
222                                         RDMA_CHANNEL_DISCONNECTING)) {
223                         TRACE_DBG("%s", "Disconnecting channel.");
224                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
225                 }
226                 break;
227         default:
228                 break;
229         }
230 }
231
232 /*
233  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
234  * the lowest four bits of value in element slot of the array of four bit
235  * elements called c_list (controller list). The index slot is one-based.
236  *
237  * @pre 1 <= slot && 0 <= value && value < 16
238  */
239 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
240 {
241         u16 id;
242         u8 tmp;
243
244         id = (slot - 1) / 2;
245         if (slot & 0x1) {
246                 tmp = c_list[id] & 0xf;
247                 c_list[id] = (value << 4) | tmp;
248         } else {
249                 tmp = c_list[id] & 0xf0;
250                 c_list[id] = (value & 0xf) | tmp;
251         }
252 }
253
254 /*
255  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
256  * ClassPortInfo in the InfiniBand Architecture Specification.
257  */
258 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
259 {
260         struct ib_class_port_info *cif;
261
262         cif = (struct ib_class_port_info *)mad->data;
263         memset(cif, 0, sizeof *cif);
264         cif->base_version = 1;
265         cif->class_version = 1;
266         cif->resp_time_value = 20;
267
268         mad->mad_hdr.status = 0;
269 }
270
271 /*
272  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
273  * InfiniBand Architecture Specification. See also section B.7,
274  * table B.6 in the T10 SRP r16a document.
275  */
276 static void srpt_get_iou(struct ib_dm_mad *mad)
277 {
278         struct ib_dm_iou_info *ioui;
279         u8 slot;
280         int i;
281
282         ioui = (struct ib_dm_iou_info *)mad->data;
283         ioui->change_id = 1;
284         ioui->max_controllers = 16;
285
286         /* set present for slot 1 and empty for the rest */
287         srpt_set_ioc(ioui->controller_list, 1, 1);
288         for (i = 1, slot = 2; i < 16; i++, slot++)
289                 srpt_set_ioc(ioui->controller_list, slot, 0);
290
291         mad->mad_hdr.status = 0;
292 }
293
294 /*
295  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
296  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
297  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
298  * document.
299  */
300 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
301                          struct ib_dm_mad *mad)
302 {
303         struct ib_dm_ioc_profile *iocp;
304
305         iocp = (struct ib_dm_ioc_profile *)mad->data;
306
307         if (!slot || slot > 16) {
308                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
309                 return;
310         }
311
312         if (slot > 2) {
313                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
314                 return;
315         }
316
317         memset(iocp, 0, sizeof *iocp);
318         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
319         iocp->guid = cpu_to_be64(mellanox_ioc_guid);
320         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
321         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
322         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
323         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
324         iocp->subsys_device_id = 0x0;
325         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
326         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
327         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
328         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
329         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
330         iocp->rdma_read_depth = 4;
331         iocp->send_size = cpu_to_be32(MAX_MESSAGE_SIZE);
332         iocp->rdma_size = cpu_to_be32(MAX_RDMA_SIZE);
333         iocp->num_svc_entries = 1;
334         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
335             SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
336
337         mad->mad_hdr.status = 0;
338 }
339
340 /*
341  * Device management: write ServiceEntries to mad for the given slot. See also
342  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
343  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
344  */
345 static void srpt_get_svc_entries(u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
346 {
347         struct ib_dm_svc_entries *svc_entries;
348
349         if (!slot || slot > 16) {
350                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
351                 return;
352         }
353
354         if (slot > 2 || lo > hi || hi > 1) {
355                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
356                 return;
357         }
358
359         svc_entries = (struct ib_dm_svc_entries *)mad->data;
360         memset(svc_entries, 0, sizeof *svc_entries);
361         svc_entries->service_entries[0].id = cpu_to_be64(mellanox_ioc_guid);
362         snprintf(svc_entries->service_entries[0].name,
363                  sizeof(svc_entries->service_entries[0].name),
364                  "%s%016llx",
365                  SRP_SERVICE_NAME_PREFIX,
366                  (unsigned long long)mellanox_ioc_guid);
367
368         mad->mad_hdr.status = 0;
369 }
370
371 /*
372  * Actual processing of a received MAD *rq_mad received through source port *sp
373  * (MAD = InfiniBand management datagram). The response to be sent back is
374  * written to *rsp_mad.
375  */
376 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
377                                  struct ib_dm_mad *rsp_mad)
378 {
379         u16 attr_id;
380         u32 slot;
381         u8 hi, lo;
382
383         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
384         switch (attr_id) {
385         case DM_ATTR_CLASS_PORT_INFO:
386                 srpt_get_class_port_info(rsp_mad);
387                 break;
388         case DM_ATTR_IOU_INFO:
389                 srpt_get_iou(rsp_mad);
390                 break;
391         case DM_ATTR_IOC_PROFILE:
392                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
393                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
394                 break;
395         case DM_ATTR_SVC_ENTRIES:
396                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
397                 hi = (u8) ((slot >> 8) & 0xff);
398                 lo = (u8) (slot & 0xff);
399                 slot = (u16) ((slot >> 16) & 0xffff);
400                 srpt_get_svc_entries(slot, hi, lo, rsp_mad);
401                 break;
402         default:
403                 rsp_mad->mad_hdr.status =
404                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
405                 break;
406         }
407 }
408
409 /*
410  * Callback function that is called by the InfiniBand core after transmission of
411  * a MAD. (MAD = management datagram; AH = address handle.)
412  */
413 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
414                                   struct ib_mad_send_wc *mad_wc)
415 {
416         ib_destroy_ah(mad_wc->send_buf->ah);
417         ib_free_send_mad(mad_wc->send_buf);
418 }
419
420 /*
421  * Callback function that is called by the InfiniBand core after reception of
422  * a MAD (management datagram).
423  */
424 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
425                                   struct ib_mad_recv_wc *mad_wc)
426 {
427         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
428         struct ib_ah *ah;
429         struct ib_mad_send_buf *rsp;
430         struct ib_dm_mad *dm_mad;
431
432         if (!mad_wc || !mad_wc->recv_buf.mad)
433                 return;
434
435         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
436                                   mad_wc->recv_buf.grh, mad_agent->port_num);
437         if (IS_ERR(ah))
438                 goto err;
439
440         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
441
442         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
443                                  mad_wc->wc->pkey_index, 0,
444                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
445                                  GFP_KERNEL);
446         if (IS_ERR(rsp))
447                 goto err_rsp;
448
449         rsp->ah = ah;
450
451         dm_mad = rsp->mad;
452         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
453         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
454         dm_mad->mad_hdr.status = 0;
455
456         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
457         case IB_MGMT_METHOD_GET:
458                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
459                 break;
460         case IB_MGMT_METHOD_SET:
461                 dm_mad->mad_hdr.status =
462                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
463                 break;
464         default:
465                 dm_mad->mad_hdr.status =
466                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
467                 break;
468         }
469
470         if (!ib_post_send_mad(rsp, NULL)) {
471                 ib_free_recv_mad(mad_wc);
472                 /* will destroy_ah & free_send_mad in send completion */
473                 return;
474         }
475
476         ib_free_send_mad(rsp);
477
478 err_rsp:
479         ib_destroy_ah(ah);
480 err:
481         ib_free_recv_mad(mad_wc);
482 }
483
484 /*
485  * Enable InfiniBand management datagram processing, update the cached sm_lid,
486  * lid and gid values, and register a callback function for processing MADs
487  * on the specified port. It is safe to call this function more than once for
488  * the same port.
489  */
490 static int srpt_refresh_port(struct srpt_port *sport)
491 {
492         struct ib_mad_reg_req reg_req;
493         struct ib_port_modify port_modify;
494         struct ib_port_attr port_attr;
495         int ret;
496
497         memset(&port_modify, 0, sizeof port_modify);
498         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
499         port_modify.clr_port_cap_mask = 0;
500
501         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
502         if (ret)
503                 goto err_mod_port;
504
505         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
506         if (ret)
507                 goto err_query_port;
508
509         sport->sm_lid = port_attr.sm_lid;
510         sport->lid = port_attr.lid;
511
512         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
513         if (ret)
514                 goto err_query_port;
515
516         if (!sport->mad_agent) {
517                 memset(&reg_req, 0, sizeof reg_req);
518                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
519                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
520                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
521                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
522
523                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
524                                                          sport->port,
525                                                          IB_QPT_GSI,
526                                                          &reg_req, 0,
527                                                          srpt_mad_send_handler,
528                                                          srpt_mad_recv_handler,
529                                                          sport);
530                 if (IS_ERR(sport->mad_agent)) {
531                         ret = PTR_ERR(sport->mad_agent);
532                         sport->mad_agent = NULL;
533                         goto err_query_port;
534                 }
535         }
536
537         return 0;
538
539 err_query_port:
540
541         port_modify.set_port_cap_mask = 0;
542         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
543         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
544
545 err_mod_port:
546
547         return ret;
548 }
549
550 /*
551  * Unregister the callback function for processing MADs and disable MAD
552  * processing for all ports of the specified device. It is safe to call this
553  * function more than once for the same device.
554  */
555 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
556 {
557         struct ib_port_modify port_modify = {
558                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
559         };
560         struct srpt_port *sport;
561         int i;
562
563         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
564                 sport = &sdev->port[i - 1];
565                 WARN_ON(sport->port != i);
566                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
567                         printk(KERN_ERR PFX "disabling MAD processing"
568                                " failed.\n");
569                 if (sport->mad_agent) {
570                         ib_unregister_mad_agent(sport->mad_agent);
571                         sport->mad_agent = NULL;
572                 }
573         }
574 }
575
576 /*
577  * Allocate and initialize an SRPT I/O context structure.
578  */
579 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
580 {
581         struct srpt_ioctx *ioctx;
582
583         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
584         if (!ioctx)
585                 goto out;
586
587         ioctx->buf = kzalloc(MAX_MESSAGE_SIZE, GFP_KERNEL);
588         if (!ioctx->buf)
589                 goto out_free_ioctx;
590
591         ioctx->dma = dma_map_single(sdev->device->dma_device, ioctx->buf,
592                                     MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
593 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
594         if (dma_mapping_error(sdev->device->dma_device, ioctx->dma))
595 #else
596         if (dma_mapping_error(ioctx->dma))
597 #endif
598                 goto out_free_buf;
599
600         return ioctx;
601
602 out_free_buf:
603         kfree(ioctx->buf);
604 out_free_ioctx:
605         kfree(ioctx);
606 out:
607         return NULL;
608 }
609
610 /*
611  * Deallocate an SRPT I/O context structure.
612  */
613 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
614 {
615         if (!ioctx)
616                 return;
617
618         dma_unmap_single(sdev->device->dma_device, ioctx->dma,
619                          MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
620         kfree(ioctx->buf);
621         kfree(ioctx);
622 }
623
624 /*
625  * Associate a ring of SRPT I/O context structures with the specified device.
626  */
627 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
628 {
629         int i;
630
631         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
632                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
633
634                 if (!sdev->ioctx_ring[i])
635                         goto err;
636
637                 sdev->ioctx_ring[i]->index = i;
638         }
639
640         return 0;
641
642 err:
643         while (--i > 0) {
644                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
645                 sdev->ioctx_ring[i] = NULL;
646         }
647         return -ENOMEM;
648 }
649
650 /* Free the ring of SRPT I/O context structures. */
651 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
652 {
653         int i;
654
655         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
656                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
657                 sdev->ioctx_ring[i] = NULL;
658         }
659 }
660
661 /*
662  * Post a receive request on the work queue of InfiniBand device 'sdev'.
663  */
664 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
665 {
666         struct ib_sge list;
667         struct ib_recv_wr wr, *bad_wr;
668
669         wr.wr_id = ioctx->index | SRPT_OP_RECV;
670
671         list.addr = ioctx->dma;
672         list.length = MAX_MESSAGE_SIZE;
673         list.lkey = sdev->mr->lkey;
674
675         wr.next = NULL;
676         wr.sg_list = &list;
677         wr.num_sge = 1;
678
679         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
680 }
681
682 /*
683  * Post an IB send request.
684  * @ch: RDMA channel to post the send request on.
685  * @ioctx: I/O context of the send request.
686  * @len: length of the request to be sent in bytes.
687  *
688  * Returns zero upon success and a non-zero value upon failure.
689  */
690 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
691                           int len)
692 {
693         struct ib_sge list;
694         struct ib_send_wr wr, *bad_wr;
695         struct srpt_device *sdev = ch->sport->sdev;
696
697         dma_sync_single_for_device(sdev->device->dma_device, ioctx->dma,
698                                    MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
699
700         list.addr = ioctx->dma;
701         list.length = len;
702         list.lkey = sdev->mr->lkey;
703
704         wr.next = NULL;
705         wr.wr_id = ioctx->index;
706         wr.sg_list = &list;
707         wr.num_sge = 1;
708         wr.opcode = IB_WR_SEND;
709         wr.send_flags = IB_SEND_SIGNALED;
710
711         return ib_post_send(ch->qp, &wr, &bad_wr);
712 }
713
714 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
715                              int *ind)
716 {
717         struct srp_indirect_buf *idb;
718         struct srp_direct_buf *db;
719
720         *ind = 0;
721         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
722             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
723                 ioctx->n_rbuf = 1;
724                 ioctx->rbufs = &ioctx->single_rbuf;
725
726                 db = (void *)srp_cmd->add_data;
727                 memcpy(ioctx->rbufs, db, sizeof *db);
728                 ioctx->data_len = be32_to_cpu(db->len);
729         } else {
730                 idb = (void *)srp_cmd->add_data;
731
732                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
733
734                 if (ioctx->n_rbuf >
735                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
736                         *ind = 1;
737                         ioctx->n_rbuf = 0;
738                         goto out;
739                 }
740
741                 if (ioctx->n_rbuf == 1)
742                         ioctx->rbufs = &ioctx->single_rbuf;
743                 else
744                         ioctx->rbufs =
745                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
746                 if (!ioctx->rbufs) {
747                         ioctx->n_rbuf = 0;
748                         return -ENOMEM;
749                 }
750
751                 db = idb->desc_list;
752                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
753                 ioctx->data_len = be32_to_cpu(idb->len);
754         }
755 out:
756         return 0;
757 }
758
759 /*
760  * Modify the attributes of queue pair 'qp': allow local write, remote read,
761  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
762  */
763 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
764 {
765         struct ib_qp_attr *attr;
766         int ret;
767
768         attr = kzalloc(sizeof *attr, GFP_KERNEL);
769         if (!attr)
770                 return -ENOMEM;
771
772         attr->qp_state = IB_QPS_INIT;
773         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
774             IB_ACCESS_REMOTE_WRITE;
775         attr->port_num = ch->sport->port;
776         attr->pkey_index = 0;
777
778         ret = ib_modify_qp(qp, attr,
779                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
780                            IB_QP_PKEY_INDEX);
781
782         kfree(attr);
783         return ret;
784 }
785
786 /**
787  * Change the state of a channel to 'ready to receive' (RTR).
788  * @ch: channel of the queue pair.
789  * @qp: queue pair to change the state of.
790  *
791  * Returns zero upon success and a negative value upon failure.
792  */
793 static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
794 {
795         struct ib_qp_attr qp_attr;
796         int attr_mask;
797         int ret;
798
799         qp_attr.qp_state = IB_QPS_RTR;
800         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
801         if (ret)
802                 goto out;
803
804         qp_attr.max_dest_rd_atomic = 4;
805
806         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
807
808 out:
809         return ret;
810 }
811
812 /**
813  * Change the state of a channel to 'ready to send' (RTS).
814  * @ch: channel of the queue pair.
815  * @qp: queue pair to change the state of.
816  *
817  * Returns zero upon success and a negative value upon failure.
818  */
819 static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
820 {
821         struct ib_qp_attr qp_attr;
822         int attr_mask;
823         int ret;
824
825         qp_attr.qp_state = IB_QPS_RTS;
826         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
827         if (ret)
828                 goto out;
829
830         qp_attr.max_rd_atomic = 4;
831
832         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
833
834 out:
835         return ret;
836 }
837
838 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
839 {
840         int i;
841
842         if (ioctx->n_rdma_ius > 0 && ioctx->rdma_ius) {
843                 struct rdma_iu *riu = ioctx->rdma_ius;
844
845                 for (i = 0; i < ioctx->n_rdma_ius; ++i, ++riu)
846                         kfree(riu->sge);
847                 kfree(ioctx->rdma_ius);
848         }
849
850         if (ioctx->n_rbuf > 1)
851                 kfree(ioctx->rbufs);
852
853         if (srpt_post_recv(ch->sport->sdev, ioctx))
854                 printk(KERN_ERR PFX "SRQ post_recv failed - this is serious\n");
855                 /* we should queue it back to free_ioctx queue */
856         else
857                 atomic_inc(&ch->req_lim_delta);
858 }
859
860 static void srpt_abort_scst_cmd(struct srpt_device *sdev,
861                                 struct scst_cmd *scmnd,
862                                 bool tell_initiator)
863 {
864         struct srpt_ioctx *ioctx;
865         scst_data_direction dir;
866
867         ioctx = scst_cmd_get_tgt_priv(scmnd);
868         BUG_ON(!ioctx);
869         dir = scst_cmd_get_data_direction(scmnd);
870         if (dir != SCST_DATA_NONE) {
871                 dma_unmap_sg(sdev->device->dma_device,
872                              scst_cmd_get_sg(scmnd),
873                              scst_cmd_get_sg_cnt(scmnd),
874                              scst_to_tgt_dma_dir(dir));
875
876 #if 1
877                 switch (scmnd->state) {
878                 case SCST_CMD_STATE_DATA_WAIT:
879                         WARN_ON(ioctx->state != SRPT_STATE_NEED_DATA);
880                         break;
881                 case SCST_CMD_STATE_XMIT_WAIT:
882                         WARN_ON(ioctx->state != SRPT_STATE_PROCESSED);
883                         break;
884                 default:
885                         WARN_ON(ioctx->state == SRPT_STATE_NEED_DATA ||
886                                 ioctx->state == SRPT_STATE_PROCESSED);
887                 }
888 #endif
889
890                 if (ioctx->state == SRPT_STATE_NEED_DATA) {
891                         scst_rx_data(scmnd,
892                                      tell_initiator ? SCST_RX_STATUS_ERROR
893                                      : SCST_RX_STATUS_ERROR_FATAL,
894                                      SCST_CONTEXT_THREAD);
895                         goto out;
896                 } else if (ioctx->state == SRPT_STATE_PROCESSED)
897                         ;
898                 else {
899                         printk(KERN_ERR PFX
900                                "unexpected cmd state %d (SCST) %d (SRPT)\n",
901                                scmnd->state, ioctx->state);
902                         WARN_ON("unexpected cmd state");
903                 }
904         }
905
906         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_FAILED);
907         scst_tgt_cmd_done(scmnd, scst_estimate_context());
908 out:
909         return;
910 }
911
912 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
913 {
914         struct srpt_ioctx *ioctx;
915         struct srpt_device *sdev = ch->sport->sdev;
916
917         if (wc->wr_id & SRPT_OP_RECV) {
918                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
919                 printk(KERN_ERR PFX "This is serious - SRQ is in bad state\n");
920         } else {
921                 ioctx = sdev->ioctx_ring[wc->wr_id];
922
923                 if (ioctx->scmnd)
924                         srpt_abort_scst_cmd(sdev, ioctx->scmnd, true);
925                 else
926                         srpt_reset_ioctx(ch, ioctx);
927         }
928 }
929
930 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
931                                   struct srpt_ioctx *ioctx,
932                                   enum scst_exec_context context)
933 {
934         if (ioctx->scmnd) {
935                 scst_data_direction dir =
936                         scst_cmd_get_data_direction(ioctx->scmnd);
937
938                 if (dir != SCST_DATA_NONE)
939                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
940                                      scst_cmd_get_sg(ioctx->scmnd),
941                                      scst_cmd_get_sg_cnt(ioctx->scmnd),
942                                      scst_to_tgt_dma_dir(dir));
943
944                 scst_tgt_cmd_done(ioctx->scmnd, context);
945         } else
946                 srpt_reset_ioctx(ch, ioctx);
947 }
948
949 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
950                                   struct srpt_ioctx *ioctx)
951 {
952         if (!ioctx->scmnd) {
953                 srpt_reset_ioctx(ch, ioctx);
954                 return;
955         }
956
957         if (scst_cmd_get_data_direction(ioctx->scmnd) == SCST_DATA_WRITE)
958                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
959                         scst_estimate_context());
960 }
961
962 /**
963  * Build an SRP_RSP response PDU.
964  * @ch: RDMA channel through which the request has been received.
965  * @ioctx: I/O context in which the SRP_RSP PDU will be built.
966  * @s_key: sense key that will be stored in the response.
967  * @s_code: value that will be stored in the asc_ascq field of the sense data.
968  * @tag: tag of the request for which this response is being generated.
969  *
970  * Returns the size in bytes of the SRP_RSP response PDU.
971  *
972  * An SRP_RSP PDU contains a SCSI status or service response. See also
973  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP PDU.
974  * See also SPC-2 for more information about sense data.
975  */
976 static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
977                               struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
978                               u64 tag)
979 {
980         struct srp_rsp *srp_rsp;
981         struct sense_data *sense;
982         int limit_delta;
983         int sense_data_len = 0;
984
985         srp_rsp = ioctx->buf;
986         memset(srp_rsp, 0, sizeof *srp_rsp);
987
988         limit_delta = atomic_read(&ch->req_lim_delta);
989         atomic_sub(limit_delta, &ch->req_lim_delta);
990
991         srp_rsp->opcode = SRP_RSP;
992         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
993         srp_rsp->tag = tag;
994
995         if (s_key != NO_SENSE) {
996                 sense_data_len = sizeof *sense + (sizeof *sense % 4);
997                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
998                 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
999                 srp_rsp->sense_data_len = cpu_to_be32(sense_data_len);
1000
1001                 sense = (struct sense_data *)(srp_rsp + 1);
1002                 sense->err_code = 0x70;
1003                 sense->key = s_key;
1004                 sense->asc_ascq = s_code;
1005         }
1006
1007         return sizeof(*srp_rsp) + sense_data_len;
1008 }
1009
1010 /**
1011  * Build a task management response, which is a specific SRP_RSP response PDU.
1012  * @ch: RDMA channel through which the request has been received.
1013  * @ioctx: I/O context in which the SRP_RSP PDU will be built.
1014  * @rsp_code: RSP_CODE that will be stored in the response.
1015  * @tag: tag of the request for which this response is being generated.
1016  *
1017  * Returns the size in bytes of the SRP_RSP response PDU.
1018  *
1019  * An SRP_RSP PDU contains a SCSI status or service response. See also
1020  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP PDU.
1021  */
1022 static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
1023                                   struct srpt_ioctx *ioctx, u8 rsp_code,
1024                                   u64 tag)
1025 {
1026         struct srp_rsp *srp_rsp;
1027         int limit_delta;
1028         int resp_data_len = 0;
1029
1030         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
1031                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
1032
1033         srp_rsp = ioctx->buf;
1034         memset(srp_rsp, 0, sizeof *srp_rsp);
1035
1036         limit_delta = atomic_read(&ch->req_lim_delta);
1037         atomic_sub(limit_delta, &ch->req_lim_delta);
1038
1039         srp_rsp->opcode = SRP_RSP;
1040         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1041         srp_rsp->tag = tag;
1042
1043         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
1044                 resp_data_len = 4;
1045                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
1046                 srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
1047                 srp_rsp->data[3] = rsp_code;
1048         }
1049
1050         return sizeof(*srp_rsp) + resp_data_len;
1051 }
1052
1053 /*
1054  * Process SRP_CMD.
1055  */
1056 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
1057 {
1058         struct scst_cmd *scmnd;
1059         struct srp_cmd *srp_cmd;
1060         struct srp_rsp *srp_rsp;
1061         scst_data_direction dir = SCST_DATA_NONE;
1062         int indirect_desc = 0;
1063         int ret;
1064         unsigned long flags;
1065
1066         srp_cmd = ioctx->buf;
1067         srp_rsp = ioctx->buf;
1068
1069         if (srp_cmd->buf_fmt) {
1070                 ret = srpt_get_desc_tbl(ioctx, srp_cmd, &indirect_desc);
1071                 if (ret) {
1072                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1073                                            NO_ADD_SENSE, srp_cmd->tag);
1074                         srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1075                         goto err;
1076                 }
1077
1078                 if (indirect_desc) {
1079                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1080                                            NO_ADD_SENSE, srp_cmd->tag);
1081                         srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1082                         goto err;
1083                 }
1084
1085                 if (srp_cmd->buf_fmt & 0xf)
1086                         dir = SCST_DATA_READ;
1087                 else if (srp_cmd->buf_fmt >> 4)
1088                         dir = SCST_DATA_WRITE;
1089                 else
1090                         dir = SCST_DATA_NONE;
1091         } else
1092                 dir = SCST_DATA_NONE;
1093
1094         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
1095                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
1096                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
1097         if (!scmnd) {
1098                 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1099                                    NO_ADD_SENSE, srp_cmd->tag);
1100                 srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1101                 goto err;
1102         }
1103
1104         ioctx->scmnd = scmnd;
1105
1106         switch (srp_cmd->task_attr) {
1107         case SRP_CMD_HEAD_OF_Q:
1108                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
1109                 break;
1110         case SRP_CMD_ORDERED_Q:
1111                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1112                 break;
1113         case SRP_CMD_SIMPLE_Q:
1114                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
1115                 break;
1116         case SRP_CMD_ACA:
1117                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
1118                 break;
1119         default:
1120                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1121                 break;
1122         }
1123
1124         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1125         scst_cmd_set_tgt_priv(scmnd, ioctx);
1126         scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
1127
1128         spin_lock_irqsave(&ch->spinlock, flags);
1129         list_add_tail(&ioctx->scmnd_list, &ch->active_scmnd_list);
1130         ch->active_scmnd_cnt++;
1131         spin_unlock_irqrestore(&ch->spinlock, flags);
1132
1133         scst_cmd_init_done(scmnd, scst_estimate_context());
1134
1135         return 0;
1136
1137 err:
1138         WARN_ON(srp_rsp->opcode != SRP_RSP);
1139
1140         return -1;
1141 }
1142
1143 /*
1144  * Process an SRP_TSK_MGMT request PDU.
1145  *
1146  * Returns 0 upon success and -1 upon failure.
1147  *
1148  * Each task management function is performed by calling one of the
1149  * scst_rx_mgmt_fn*() functions. These functions will either report failure
1150  * or process the task management function asynchronously. The function
1151  * srpt_tsk_mgmt_done() will be called by the SCST core upon completion of the
1152  * task management function. When srpt_handle_tsk_mgmt() reports failure
1153  * (i.e. returns -1) a response PDU will have been built in ioctx->buf. This
1154  * PDU has to be sent back by the caller.
1155  *
1156  * For more information about SRP_TSK_MGMT PDU's, see also section 6.7 in
1157  * the T10 SRP r16a document.
1158  */
1159 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1160                                 struct srpt_ioctx *ioctx)
1161 {
1162         struct srp_tsk_mgmt *srp_tsk;
1163         struct srpt_mgmt_ioctx *mgmt_ioctx;
1164         int ret;
1165
1166         srp_tsk = ioctx->buf;
1167
1168         TRACE_DBG("recv_tsk_mgmt= %d for task_tag= %lld"
1169                   " using tag= %lld cm_id= %p sess= %p",
1170                   srp_tsk->tsk_mgmt_func,
1171                   (unsigned long long) srp_tsk->task_tag,
1172                   (unsigned long long) srp_tsk->tag,
1173                   ch->cm_id, ch->scst_sess);
1174
1175         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1176         if (!mgmt_ioctx) {
1177                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1178                                        srp_tsk->tag);
1179                 goto err;
1180         }
1181
1182         mgmt_ioctx->ioctx = ioctx;
1183         mgmt_ioctx->ch = ch;
1184         mgmt_ioctx->tag = srp_tsk->tag;
1185
1186         switch (srp_tsk->tsk_mgmt_func) {
1187         case SRP_TSK_ABORT_TASK:
1188                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK");
1189                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1190                                           SCST_ABORT_TASK,
1191                                           srp_tsk->task_tag,
1192                                           thread ?
1193                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1194                                           mgmt_ioctx);
1195                 break;
1196         case SRP_TSK_ABORT_TASK_SET:
1197                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK_SET");
1198                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1199                                           SCST_ABORT_TASK_SET,
1200                                           (u8 *) &srp_tsk->lun,
1201                                           sizeof srp_tsk->lun,
1202                                           thread ?
1203                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1204                                           mgmt_ioctx);
1205                 break;
1206         case SRP_TSK_CLEAR_TASK_SET:
1207                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_TASK_SET");
1208                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1209                                           SCST_CLEAR_TASK_SET,
1210                                           (u8 *) &srp_tsk->lun,
1211                                           sizeof srp_tsk->lun,
1212                                           thread ?
1213                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1214                                           mgmt_ioctx);
1215                 break;
1216         case SRP_TSK_LUN_RESET:
1217                 TRACE_DBG("%s", "Processing SRP_TSK_LUN_RESET");
1218                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1219                                           SCST_LUN_RESET,
1220                                           (u8 *) &srp_tsk->lun,
1221                                           sizeof srp_tsk->lun,
1222                                           thread ?
1223                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1224                                           mgmt_ioctx);
1225                 break;
1226         case SRP_TSK_CLEAR_ACA:
1227                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_ACA");
1228                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1229                                           SCST_CLEAR_ACA,
1230                                           (u8 *) &srp_tsk->lun,
1231                                           sizeof srp_tsk->lun,
1232                                           thread ?
1233                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1234                                           mgmt_ioctx);
1235                 break;
1236         default:
1237                 TRACE_DBG("%s", "Unsupported task management function.");
1238                 srpt_build_tskmgmt_rsp(ch, ioctx,
1239                                        SRP_TSK_MGMT_FUNC_NOT_SUPP,
1240                                        srp_tsk->tag);
1241                 goto err;
1242         }
1243
1244         if (ret) {
1245                 TRACE_DBG("%s", "Processing task management function failed.");
1246                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1247                                        srp_tsk->tag);
1248                 goto err;
1249         }
1250
1251         WARN_ON(srp_tsk->opcode == SRP_RSP);
1252
1253         return 0;
1254
1255 err:
1256         WARN_ON(srp_tsk->opcode != SRP_RSP);
1257
1258         kfree(mgmt_ioctx);
1259         return -1;
1260 }
1261
1262 /**
1263  * Process a receive completion event.
1264  * @ch: RDMA channel for which the completion event has been received.
1265  * @ioctx: SRPT I/O context for which the completion event has been received.
1266  */
1267 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1268                                struct srpt_ioctx *ioctx)
1269 {
1270         struct srp_cmd *srp_cmd;
1271         struct srp_rsp *srp_rsp;
1272         unsigned long flags;
1273         int len;
1274
1275         spin_lock_irqsave(&ch->spinlock, flags);
1276         if (ch->state != RDMA_CHANNEL_LIVE) {
1277                 if (ch->state == RDMA_CHANNEL_CONNECTING) {
1278                         list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1279                         spin_unlock_irqrestore(&ch->spinlock, flags);
1280                         return;
1281                 } else {
1282                         spin_unlock_irqrestore(&ch->spinlock, flags);
1283                         srpt_reset_ioctx(ch, ioctx);
1284                         return;
1285                 }
1286         }
1287         spin_unlock_irqrestore(&ch->spinlock, flags);
1288
1289         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
1290                                 MAX_MESSAGE_SIZE, DMA_FROM_DEVICE);
1291
1292         ioctx->data_len = 0;
1293         ioctx->n_rbuf = 0;
1294         ioctx->rbufs = NULL;
1295         ioctx->n_rdma = 0;
1296         ioctx->n_rdma_ius = 0;
1297         ioctx->rdma_ius = NULL;
1298         ioctx->scmnd = NULL;
1299         ioctx->state = SRPT_STATE_NEW;
1300
1301         srp_cmd = ioctx->buf;
1302         srp_rsp = ioctx->buf;
1303
1304         switch (srp_cmd->opcode) {
1305         case SRP_CMD:
1306                 if (srpt_handle_cmd(ch, ioctx) < 0)
1307                         goto err;
1308                 break;
1309
1310         case SRP_TSK_MGMT:
1311                 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1312                         goto err;
1313                 break;
1314
1315         case SRP_I_LOGOUT:
1316         case SRP_AER_REQ:
1317         default:
1318                 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1319                                    srp_cmd->tag);
1320                 goto err;
1321         }
1322
1323         dma_sync_single_for_device(ch->sport->sdev->device->dma_device,
1324                                    ioctx->dma, MAX_MESSAGE_SIZE,
1325                                    DMA_FROM_DEVICE);
1326
1327         return;
1328
1329 err:
1330         WARN_ON(srp_rsp->opcode != SRP_RSP);
1331         len = (sizeof *srp_rsp) + be32_to_cpu(srp_rsp->sense_data_len);
1332
1333         if (ch->state != RDMA_CHANNEL_LIVE) {
1334                 /* Give up if another thread modified the channel state. */
1335                 printk(KERN_ERR PFX "%s: channel is in state %d",
1336                        __func__, ch->state);
1337                 srpt_reset_ioctx(ch, ioctx);
1338         } else if (srpt_post_send(ch, ioctx, len)) {
1339                 printk(KERN_ERR PFX "%s: sending SRP_RSP PDU failed",
1340                        __func__);
1341                 srpt_reset_ioctx(ch, ioctx);
1342         }
1343 }
1344
1345 /*
1346  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1347  * should stop.
1348  * @pre thread != 0
1349  */
1350 static inline int srpt_test_ioctx_list(void)
1351 {
1352         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1353                    unlikely(kthread_should_stop()));
1354         return res;
1355 }
1356
1357 /*
1358  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1359  *
1360  * @pre thread != 0
1361  */
1362 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1363 {
1364         unsigned long flags;
1365
1366         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1367         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1368         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1369         wake_up(&ioctx_list_waitQ);
1370 }
1371
1372 /**
1373  * InfiniBand completion queue callback function.
1374  * @cq: completion queue.
1375  * @ctx: completion queue context, which was passed as the fourth argument of
1376  *       the function ib_create_cq().
1377  */
1378 static void srpt_completion(struct ib_cq *cq, void *ctx)
1379 {
1380         struct srpt_rdma_ch *ch = ctx;
1381         struct srpt_device *sdev = ch->sport->sdev;
1382         struct ib_wc wc;
1383         struct srpt_ioctx *ioctx;
1384
1385         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1386         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1387                 if (wc.status) {
1388                         printk(KERN_ERR PFX "failed %s status= %d\n",
1389                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1390                                wc.status);
1391                         srpt_handle_err_comp(ch, &wc);
1392                         break;
1393                 }
1394
1395                 if (wc.wr_id & SRPT_OP_RECV) {
1396                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1397                         if (thread) {
1398                                 ioctx->ch = ch;
1399                                 ioctx->op = IB_WC_RECV;
1400                                 srpt_schedule_thread(ioctx);
1401                         } else
1402                                 srpt_handle_new_iu(ch, ioctx);
1403                         continue;
1404                 } else
1405                         ioctx = sdev->ioctx_ring[wc.wr_id];
1406
1407                 if (thread) {
1408                         ioctx->ch = ch;
1409                         ioctx->op = wc.opcode;
1410                         srpt_schedule_thread(ioctx);
1411                 } else {
1412                         switch (wc.opcode) {
1413                         case IB_WC_SEND:
1414                                 srpt_handle_send_comp(ch, ioctx,
1415                                         scst_estimate_context());
1416                                 break;
1417                         case IB_WC_RDMA_WRITE:
1418                         case IB_WC_RDMA_READ:
1419                                 srpt_handle_rdma_comp(ch, ioctx);
1420                                 break;
1421                         default:
1422                                 break;
1423                         }
1424                 }
1425         }
1426 }
1427
1428 /*
1429  * Create a completion queue on the specified device.
1430  */
1431 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1432 {
1433         struct ib_qp_init_attr *qp_init;
1434         struct srpt_device *sdev = ch->sport->sdev;
1435         int cqe;
1436         int ret;
1437
1438         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1439         if (!qp_init)
1440                 return -ENOMEM;
1441
1442         /* Create a completion queue (CQ). */
1443
1444         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1445 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1446         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1447 #else
1448         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1449 #endif
1450         if (IS_ERR(ch->cq)) {
1451                 ret = PTR_ERR(ch->cq);
1452                 printk(KERN_ERR PFX "failed to create_cq cqe= %d ret= %d\n",
1453                         cqe, ret);
1454                 goto out;
1455         }
1456
1457         /* Request completion notification. */
1458
1459         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1460
1461         /* Create a queue pair (QP). */
1462
1463         qp_init->qp_context = (void *)ch;
1464         qp_init->event_handler = srpt_qp_event;
1465         qp_init->send_cq = ch->cq;
1466         qp_init->recv_cq = ch->cq;
1467         qp_init->srq = sdev->srq;
1468         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1469         qp_init->qp_type = IB_QPT_RC;
1470         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1471         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1472
1473         ch->qp = ib_create_qp(sdev->pd, qp_init);
1474         if (IS_ERR(ch->qp)) {
1475                 ret = PTR_ERR(ch->qp);
1476                 ib_destroy_cq(ch->cq);
1477                 printk(KERN_ERR PFX "failed to create_qp ret= %d\n", ret);
1478                 goto out;
1479         }
1480
1481         TRACE_DBG("%s: max_cqe= %d max_sge= %d cm_id= %p",
1482                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1483                ch->cm_id);
1484
1485         /* Modify the attributes and the state of queue pair ch->qp. */
1486
1487         ret = srpt_init_ch_qp(ch, ch->qp);
1488         if (ret) {
1489                 ib_destroy_qp(ch->qp);
1490                 ib_destroy_cq(ch->cq);
1491                 goto out;
1492         }
1493
1494         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1495 out:
1496         kfree(qp_init);
1497         return ret;
1498 }
1499
1500 /**
1501  * Look up the RDMA channel that corresponds to the specified cm_id.
1502  *
1503  * Return NULL if no matching RDMA channel has been found.
1504  */
1505 static struct srpt_rdma_ch *srpt_find_channel(struct ib_cm_id *cm_id, bool del)
1506 {
1507         struct srpt_device *sdev = cm_id->context;
1508         struct srpt_rdma_ch *ch;
1509
1510         spin_lock_irq(&sdev->spinlock);
1511         list_for_each_entry(ch, &sdev->rch_list, list) {
1512                 if (ch->cm_id == cm_id) {
1513                         if (del)
1514                                 list_del(&ch->list);
1515                         spin_unlock_irq(&sdev->spinlock);
1516                         return ch;
1517                 }
1518         }
1519
1520         spin_unlock_irq(&sdev->spinlock);
1521
1522         return NULL;
1523 }
1524
1525 /**
1526  * Release all resources associated with the specified RDMA channel.
1527  *
1528  * Note: the caller must have removed the channel from the channel list
1529  * before calling this function.
1530  */
1531 static void srpt_release_channel(struct srpt_rdma_ch *ch, int destroy_cmid)
1532 {
1533         TRACE_ENTRY();
1534
1535         WARN_ON(srpt_find_channel(ch->cm_id, false) == ch);
1536
1537         if (ch->cm_id && destroy_cmid) {
1538                 TRACE_DBG("%s: destroy cm_id= %p", __func__, ch->cm_id);
1539                 ib_destroy_cm_id(ch->cm_id);
1540                 ch->cm_id = NULL;
1541         }
1542
1543         ib_destroy_qp(ch->qp);
1544         ib_destroy_cq(ch->cq);
1545
1546         if (ch->scst_sess) {
1547                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1548
1549                 TRACE_DBG("%s: release sess= %p sess_name= %s active_cmd= %d",
1550                           __func__, ch->scst_sess, ch->sess_name,
1551                           ch->active_scmnd_cnt);
1552
1553                 spin_lock_irq(&ch->spinlock);
1554                 list_for_each_entry_safe(ioctx, ioctx_tmp,
1555                                          &ch->active_scmnd_list, scmnd_list) {
1556                         spin_unlock_irq(&ch->spinlock);
1557
1558                         if (ioctx->scmnd)
1559                                 srpt_abort_scst_cmd(ch->sport->sdev,
1560                                                     ioctx->scmnd, true);
1561
1562                         spin_lock_irq(&ch->spinlock);
1563                 }
1564                 WARN_ON(!list_empty(&ch->active_scmnd_list));
1565                 WARN_ON(ch->active_scmnd_cnt != 0);
1566                 spin_unlock_irq(&ch->spinlock);
1567
1568                 scst_unregister_session(ch->scst_sess, 0, NULL);
1569                 ch->scst_sess = NULL;
1570         }
1571
1572         kfree(ch);
1573
1574         TRACE_EXIT();
1575 }
1576
1577 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1578                             struct ib_cm_req_event_param *param,
1579                             void *private_data)
1580 {
1581         struct srpt_device *sdev = cm_id->context;
1582         struct srp_login_req *req;
1583         struct srp_login_rsp *rsp;
1584         struct srp_login_rej *rej;
1585         struct ib_cm_rep_param *rep_param;
1586         struct srpt_rdma_ch *ch, *tmp_ch;
1587         u32 it_iu_len;
1588         int ret = 0;
1589
1590         if (!sdev || !private_data)
1591                 return -EINVAL;
1592
1593         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1594         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1595         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1596
1597         if (!rsp || !rej || !rep_param) {
1598                 ret = -ENOMEM;
1599                 goto out;
1600         }
1601
1602         req = (struct srp_login_req *)private_data;
1603
1604         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1605
1606         TRACE_DBG("Host login i_port_id=0x%llx:0x%llx t_port_id=0x%llx:0x%llx"
1607             " it_iu_len=%d",
1608             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1609             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1610             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1611             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1612             it_iu_len);
1613
1614         if (it_iu_len > MAX_MESSAGE_SIZE || it_iu_len < 64) {
1615                 rej->reason =
1616                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1617                 ret = -EINVAL;
1618                 TRACE_DBG("Reject invalid it_iu_len=%d", it_iu_len);
1619                 goto reject;
1620         }
1621
1622         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1623                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1624
1625                 spin_lock_irq(&sdev->spinlock);
1626
1627                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1628                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1629                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1630                             && param->port == ch->sport->port
1631                             && param->listen_id == ch->sport->sdev->cm_id
1632                             && ch->cm_id) {
1633                                 enum rdma_ch_state prev_state;
1634
1635                                 /* found an existing channel */
1636                                 TRACE_DBG("Found existing channel name= %s"
1637                                           " cm_id= %p state= %d",
1638                                           ch->sess_name, ch->cm_id, ch->state);
1639
1640                                 prev_state = ch->state;
1641                                 if (ch->state == RDMA_CHANNEL_LIVE)
1642                                         ch->state = RDMA_CHANNEL_DISCONNECTING;
1643                                 else if (ch->state == RDMA_CHANNEL_CONNECTING)
1644                                         list_del(&ch->list);
1645
1646                                 spin_unlock_irq(&sdev->spinlock);
1647
1648                                 rsp->rsp_flags =
1649                                         SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1650
1651                                 if (prev_state == RDMA_CHANNEL_LIVE)
1652                                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
1653                                 else if (prev_state ==
1654                                          RDMA_CHANNEL_CONNECTING) {
1655                                         ib_send_cm_rej(ch->cm_id,
1656                                                        IB_CM_REJ_NO_RESOURCES,
1657                                                        NULL, 0, NULL, 0);
1658                                         srpt_release_channel(ch, 1);
1659                                 }
1660
1661                                 spin_lock_irq(&sdev->spinlock);
1662                         }
1663                 }
1664
1665                 spin_unlock_irq(&sdev->spinlock);
1666
1667         } else
1668                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1669
1670         if (((u64) (*(u64 *) req->target_port_id) !=
1671              cpu_to_be64(mellanox_ioc_guid)) ||
1672             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1673              cpu_to_be64(mellanox_ioc_guid))) {
1674                 rej->reason =
1675                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1676                 ret = -ENOMEM;
1677                 TRACE_DBG("%s", "Reject invalid target_port_id");
1678                 goto reject;
1679         }
1680
1681         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1682         if (!ch) {
1683                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1684                 TRACE_DBG("%s", "Reject failed allocate rdma_ch");
1685                 ret = -ENOMEM;
1686                 goto reject;
1687         }
1688
1689         spin_lock_init(&ch->spinlock);
1690         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1691         memcpy(ch->t_port_id, req->target_port_id, 16);
1692         ch->sport = &sdev->port[param->port - 1];
1693         ch->cm_id = cm_id;
1694         ch->state = RDMA_CHANNEL_CONNECTING;
1695         INIT_LIST_HEAD(&ch->cmd_wait_list);
1696         INIT_LIST_HEAD(&ch->active_scmnd_list);
1697
1698         ret = srpt_create_ch_ib(ch);
1699         if (ret) {
1700                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1701                 TRACE_DBG("%s", "Reject failed to create rdma_ch");
1702                 goto free_ch;
1703         }
1704
1705         ret = srpt_ch_qp_rtr(ch, ch->qp);
1706         if (ret) {
1707                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1708                 TRACE_DBG("Reject failed qp to rtr/rts ret=%d", ret);
1709                 goto destroy_ib;
1710         }
1711
1712         snprintf(ch->sess_name, sizeof(ch->sess_name),
1713                  "0x%016llx%016llx",
1714                  (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1715                  (unsigned long long)be64_to_cpu(*(u64 *)(ch->i_port_id + 8)));
1716
1717         TRACE_DBG("registering session %s", ch->sess_name);
1718
1719         BUG_ON(!sdev->scst_tgt);
1720         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1721                                   NULL, NULL);
1722         if (!ch->scst_sess) {
1723                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1724                 TRACE_DBG("%s", "Failed to create scst sess");
1725                 goto destroy_ib;
1726         }
1727
1728         TRACE_DBG("Establish connection sess=%p name=%s cm_id=%p",
1729                   ch->scst_sess, ch->sess_name, ch->cm_id);
1730
1731         scst_sess_set_tgt_priv(ch->scst_sess, ch);
1732
1733         /* create srp_login_response */
1734         rsp->opcode = SRP_LOGIN_RSP;
1735         rsp->tag = req->tag;
1736         rsp->max_it_iu_len = req->req_it_iu_len;
1737         rsp->max_ti_iu_len = req->req_it_iu_len;
1738         rsp->buf_fmt =
1739             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1740         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1741         atomic_set(&ch->req_lim_delta, 0);
1742
1743         /* create cm reply */
1744         rep_param->qp_num = ch->qp->qp_num;
1745         rep_param->private_data = (void *)rsp;
1746         rep_param->private_data_len = sizeof *rsp;
1747         rep_param->rnr_retry_count = 7;
1748         rep_param->flow_control = 1;
1749         rep_param->failover_accepted = 0;
1750         rep_param->srq = 1;
1751         rep_param->responder_resources = 4;
1752         rep_param->initiator_depth = 4;
1753
1754         ret = ib_send_cm_rep(cm_id, rep_param);
1755         if (ret)
1756                 goto release_channel;
1757
1758         spin_lock_irq(&sdev->spinlock);
1759         list_add_tail(&ch->list, &sdev->rch_list);
1760         spin_unlock_irq(&sdev->spinlock);
1761
1762         goto out;
1763
1764 release_channel:
1765         scst_unregister_session(ch->scst_sess, 0, NULL);
1766         ch->scst_sess = NULL;
1767
1768 destroy_ib:
1769         ib_destroy_qp(ch->qp);
1770         ib_destroy_cq(ch->cq);
1771
1772 free_ch:
1773         kfree(ch);
1774
1775 reject:
1776         rej->opcode = SRP_LOGIN_REJ;
1777         rej->tag = req->tag;
1778         rej->buf_fmt =
1779             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1780
1781         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1782                              (void *)rej, sizeof *rej);
1783
1784 out:
1785         kfree(rep_param);
1786         kfree(rsp);
1787         kfree(rej);
1788
1789         return ret;
1790 }
1791
1792 /**
1793  * Release the channel with the specified cm_id.
1794  *
1795  * Returns one to indicate that the caller of srpt_cm_handler() should destroy
1796  * the cm_id.
1797  */
1798 static void srpt_find_and_release_channel(struct ib_cm_id *cm_id)
1799 {
1800         struct srpt_rdma_ch *ch;
1801
1802         ch = srpt_find_channel(cm_id, true);
1803         if (ch)
1804                 srpt_release_channel(ch, 0);
1805 }
1806
1807 static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
1808 {
1809         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1810         srpt_find_and_release_channel(cm_id);
1811 }
1812
1813 /**
1814  * Process an IB_CM_RTU_RECEIVED or IB_CM_USER_ESTABLISHED event.
1815  *
1816  * An IB_CM_RTU_RECEIVED message indicates that the connection is established
1817  * and that the recipient may begin transmitting (RTU = ready to use).
1818  */
1819 static int srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
1820 {
1821         struct srpt_rdma_ch *ch;
1822         int ret;
1823
1824         ch = srpt_find_channel(cm_id, false);
1825         if (!ch)
1826                 return -EINVAL;
1827
1828         if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_CONNECTING,
1829                                             RDMA_CHANNEL_LIVE)) {
1830                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1831
1832                 ret = srpt_ch_qp_rts(ch, ch->qp);
1833
1834                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
1835                                          wait_list) {
1836                         list_del(&ioctx->wait_list);
1837                         srpt_handle_new_iu(ch, ioctx);
1838                 }
1839                 if (ret && srpt_test_and_set_channel_state(ch,
1840                                         RDMA_CHANNEL_LIVE,
1841                                         RDMA_CHANNEL_DISCONNECTING)) {
1842                         TRACE_DBG("cm_id=%p sess_name=%s state=%d",
1843                                   cm_id, ch->sess_name, ch->state);
1844                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
1845                 }
1846         } else if (ch->state == RDMA_CHANNEL_DISCONNECTING) {
1847                 TRACE_DBG("cm_id=%p sess_name=%s state=%d",
1848                           cm_id, ch->sess_name, ch->state);
1849                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
1850                 ret = -EAGAIN;
1851         } else
1852                 ret = 0;
1853
1854         return ret;
1855 }
1856
1857 static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
1858 {
1859         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1860         srpt_find_and_release_channel(cm_id);
1861 }
1862
1863 static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
1864 {
1865         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1866         srpt_find_and_release_channel(cm_id);
1867 }
1868
1869 static int srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
1870 {
1871         struct srpt_rdma_ch *ch;
1872
1873         ch = srpt_find_channel(cm_id, false);
1874         if (!ch)
1875                 return -EINVAL;
1876
1877         TRACE_DBG("%s: cm_id= %p ch->state= %d",
1878                  __func__, cm_id, ch->state);
1879
1880         switch (ch->state) {
1881         case RDMA_CHANNEL_LIVE:
1882         case RDMA_CHANNEL_CONNECTING:
1883                 ib_send_cm_drep(ch->cm_id, NULL, 0);
1884                 break;
1885         case RDMA_CHANNEL_DISCONNECTING:
1886         default:
1887                 break;
1888         }
1889
1890         return 0;
1891 }
1892
1893 static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
1894 {
1895         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1896         srpt_find_and_release_channel(cm_id);
1897 }
1898
1899 /**
1900  * IB connection manager callback function.
1901  *
1902  * A non-zero return value will make the caller destroy the CM ID.
1903  *
1904  * Note: srpt_add_one passes a struct srpt_device* as the third argument to
1905  * the ib_create_cm_id() call.
1906  */
1907 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1908 {
1909         int ret = 0;
1910
1911         switch (event->event) {
1912         case IB_CM_REQ_RECEIVED:
1913                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
1914                                        event->private_data);
1915                 break;
1916         case IB_CM_REJ_RECEIVED:
1917                 srpt_cm_rej_recv(cm_id);
1918                 ret = -EINVAL;
1919                 break;
1920         case IB_CM_RTU_RECEIVED:
1921         case IB_CM_USER_ESTABLISHED:
1922                 ret = srpt_cm_rtu_recv(cm_id);
1923                 break;
1924         case IB_CM_DREQ_RECEIVED:
1925                 ret = srpt_cm_dreq_recv(cm_id);
1926                 break;
1927         case IB_CM_DREP_RECEIVED:
1928                 srpt_cm_drep_recv(cm_id);
1929                 ret = -EINVAL;
1930                 break;
1931         case IB_CM_TIMEWAIT_EXIT:
1932                 srpt_cm_timewait_exit(cm_id);
1933                 ret = -EINVAL;
1934                 break;
1935         case IB_CM_REP_ERROR:
1936                 srpt_cm_rep_error(cm_id);
1937                 ret = -EINVAL;
1938                 break;
1939         default:
1940                 break;
1941         }
1942
1943         return ret;
1944 }
1945
1946 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1947                                  struct srpt_ioctx *ioctx,
1948                                  struct scst_cmd *scmnd)
1949 {
1950         struct scatterlist *scat;
1951         scst_data_direction dir;
1952         struct rdma_iu *riu;
1953         struct srp_direct_buf *db;
1954         dma_addr_t dma_addr;
1955         struct ib_sge *sge;
1956         u64 raddr;
1957         u32 rsize;
1958         u32 tsize;
1959         u32 dma_len;
1960         int count, nrdma;
1961         int i, j, k;
1962
1963         scat = scst_cmd_get_sg(scmnd);
1964         dir = scst_cmd_get_data_direction(scmnd);
1965         count = dma_map_sg(ch->sport->sdev->device->dma_device, scat,
1966                            scst_cmd_get_sg_cnt(scmnd),
1967                            scst_to_tgt_dma_dir(dir));
1968         if (unlikely(!count))
1969                 return -EBUSY;
1970
1971         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
1972                 nrdma = ioctx->n_rdma_ius;
1973         else {
1974                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
1975
1976                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
1977                                           scst_cmd_atomic(scmnd)
1978                                           ? GFP_ATOMIC : GFP_KERNEL);
1979                 if (!ioctx->rdma_ius) {
1980                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1981                                      scat, scst_cmd_get_sg_cnt(scmnd),
1982                                      scst_to_tgt_dma_dir(dir));
1983                         return -ENOMEM;
1984                 }
1985
1986                 ioctx->n_rdma_ius = nrdma;
1987         }
1988
1989         db = ioctx->rbufs;
1990         tsize = (dir == SCST_DATA_READ) ?
1991                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1992         dma_len = sg_dma_len(&scat[0]);
1993         riu = ioctx->rdma_ius;
1994
1995         /*
1996          * For each remote desc - calculate the #ib_sge.
1997          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
1998          *      each remote desc rdma_iu is required a rdma wr;
1999          * else
2000          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
2001          *      another rdma wr
2002          */
2003         for (i = 0, j = 0;
2004              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2005                 rsize = be32_to_cpu(db->len);
2006                 raddr = be64_to_cpu(db->va);
2007                 riu->raddr = raddr;
2008                 riu->rkey = be32_to_cpu(db->key);
2009                 riu->sge_cnt = 0;
2010
2011                 /* calculate how many sge required for this remote_buf */
2012                 while (rsize > 0 && tsize > 0) {
2013
2014                         if (rsize >= dma_len) {
2015                                 tsize -= dma_len;
2016                                 rsize -= dma_len;
2017                                 raddr += dma_len;
2018
2019                                 if (tsize > 0) {
2020                                         ++j;
2021                                         if (j < count)
2022                                                 dma_len = sg_dma_len(&scat[j]);
2023                                 }
2024                         } else {
2025                                 tsize -= rsize;
2026                                 dma_len -= rsize;
2027                                 rsize = 0;
2028                         }
2029
2030                         ++riu->sge_cnt;
2031
2032                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
2033                                 riu->sge =
2034                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
2035                                             scst_cmd_atomic(scmnd)
2036                                             ? GFP_ATOMIC : GFP_KERNEL);
2037                                 if (!riu->sge)
2038                                         goto free_mem;
2039
2040                                 ++ioctx->n_rdma;
2041                                 ++riu;
2042                                 riu->sge_cnt = 0;
2043                                 riu->raddr = raddr;
2044                                 riu->rkey = be32_to_cpu(db->key);
2045                         }
2046                 }
2047
2048                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
2049                                    scst_cmd_atomic(scmnd)
2050                                    ? GFP_ATOMIC : GFP_KERNEL);
2051
2052                 if (!riu->sge)
2053                         goto free_mem;
2054
2055                 ++ioctx->n_rdma;
2056         }
2057
2058         db = ioctx->rbufs;
2059         scat = scst_cmd_get_sg(scmnd);
2060         tsize = (dir == SCST_DATA_READ) ?
2061                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2062         riu = ioctx->rdma_ius;
2063         dma_len = sg_dma_len(&scat[0]);
2064         dma_addr = sg_dma_address(&scat[0]);
2065
2066         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
2067         for (i = 0, j = 0;
2068              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2069                 rsize = be32_to_cpu(db->len);
2070                 sge = riu->sge;
2071                 k = 0;
2072
2073                 while (rsize > 0 && tsize > 0) {
2074                         sge->addr = dma_addr;
2075                         sge->lkey = ch->sport->sdev->mr->lkey;
2076
2077                         if (rsize >= dma_len) {
2078                                 sge->length =
2079                                         (tsize < dma_len) ? tsize : dma_len;
2080                                 tsize -= dma_len;
2081                                 rsize -= dma_len;
2082
2083                                 if (tsize > 0) {
2084                                         ++j;
2085                                         if (j < count) {
2086                                                 dma_len = sg_dma_len(&scat[j]);
2087                                                 dma_addr =
2088                                                     sg_dma_address(&scat[j]);
2089                                         }
2090                                 }
2091                         } else {
2092                                 sge->length = (tsize < rsize) ? tsize : rsize;
2093                                 tsize -= rsize;
2094                                 dma_len -= rsize;
2095                                 dma_addr += rsize;
2096                                 rsize = 0;
2097                         }
2098
2099                         ++k;
2100                         if (k == riu->sge_cnt && rsize > 0) {
2101                                 ++riu;
2102                                 sge = riu->sge;
2103                                 k = 0;
2104                         } else if (rsize > 0)
2105                                 ++sge;
2106                 }
2107         }
2108
2109         return 0;
2110
2111 free_mem:
2112         while (ioctx->n_rdma)
2113                 kfree(ioctx->rdma_ius[ioctx->n_rdma--].sge);
2114
2115         kfree(ioctx->rdma_ius);
2116
2117         dma_unmap_sg(ch->sport->sdev->device->dma_device,
2118                      scat, scst_cmd_get_sg_cnt(scmnd),
2119                      scst_to_tgt_dma_dir(dir));
2120
2121         return -ENOMEM;
2122 }
2123
2124 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2125                               scst_data_direction dir)
2126 {
2127         struct ib_send_wr wr;
2128         struct ib_send_wr *bad_wr;
2129         struct rdma_iu *riu;
2130         int i;
2131         int ret = 0;
2132
2133         riu = ioctx->rdma_ius;
2134         memset(&wr, 0, sizeof wr);
2135
2136         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
2137                 wr.opcode = (dir == SCST_DATA_READ) ?
2138                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
2139                 wr.next = NULL;
2140                 wr.wr_id = ioctx->index;
2141                 wr.wr.rdma.remote_addr = riu->raddr;
2142                 wr.wr.rdma.rkey = riu->rkey;
2143                 wr.num_sge = riu->sge_cnt;
2144                 wr.sg_list = riu->sge;
2145
2146                 /* only get completion event for the last rdma wr */
2147                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
2148                         wr.send_flags = IB_SEND_SIGNALED;
2149
2150                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
2151                 if (ret)
2152                         break;
2153         }
2154
2155         return ret;
2156 }
2157
2158 /*
2159  * Start data reception. Must not block.
2160  */
2161 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2162                           struct scst_cmd *scmnd)
2163 {
2164         int ret;
2165
2166         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
2167         if (ret) {
2168                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
2169                 ret = SCST_TGT_RES_QUEUE_FULL;
2170                 goto out;
2171         }
2172
2173         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
2174         if (ret) {
2175                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
2176                 if (ret == -EAGAIN || ret == -ENOMEM)
2177                         ret = SCST_TGT_RES_QUEUE_FULL;
2178                 else
2179                         ret = SCST_TGT_RES_FATAL_ERROR;
2180                 goto out;
2181         }
2182
2183         ret = SCST_TGT_RES_SUCCESS;
2184
2185 out:
2186         return ret;
2187 }
2188
2189 /*
2190  * Called by the SCST core to inform ib_srpt that data reception should start.
2191  * Must not block.
2192  */
2193 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
2194 {
2195         struct srpt_rdma_ch *ch;
2196         struct srpt_ioctx *ioctx;
2197
2198         ioctx = scst_cmd_get_tgt_priv(scmnd);
2199         BUG_ON(!ioctx);
2200
2201         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2202         BUG_ON(!ch);
2203
2204         if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2205                 return SCST_TGT_RES_FATAL_ERROR;
2206         else if (ch->state == RDMA_CHANNEL_CONNECTING)
2207                 return SCST_TGT_RES_QUEUE_FULL;
2208
2209         ioctx->state = SRPT_STATE_NEED_DATA;
2210
2211         return srpt_xfer_data(ch, ioctx, scmnd);
2212 }
2213
2214 /*
2215  * Called by the SCST core. Transmits the response buffer and status held in
2216  * 'scmnd'. Must not block.
2217  */
2218 static int srpt_xmit_response(struct scst_cmd *scmnd)
2219 {
2220         struct srpt_rdma_ch *ch;
2221         struct srpt_ioctx *ioctx;
2222         struct srp_rsp *srp_rsp;
2223         u64 tag;
2224         int ret = SCST_TGT_RES_SUCCESS;
2225         int dir;
2226         int status;
2227
2228         ioctx = scst_cmd_get_tgt_priv(scmnd);
2229         BUG_ON(!ioctx);
2230
2231         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2232         BUG_ON(!ch);
2233
2234         tag = scst_cmd_get_tag(scmnd);
2235
2236         if (ch->state != RDMA_CHANNEL_LIVE) {
2237                 printk(KERN_ERR PFX
2238                        "%s: tag= %lld channel in bad state %d\n",
2239                        __func__, (unsigned long long)tag, ch->state);
2240
2241                 if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2242                         ret = SCST_TGT_RES_FATAL_ERROR;
2243                 else if (ch->state == RDMA_CHANNEL_CONNECTING)
2244                         ret = SCST_TGT_RES_QUEUE_FULL;
2245
2246                 if (unlikely(scst_cmd_aborted(scmnd)))
2247                         goto out_aborted;
2248
2249                 goto out;
2250         }
2251
2252         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
2253                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
2254
2255         srp_rsp = ioctx->buf;
2256
2257         if (unlikely(scst_cmd_aborted(scmnd))) {
2258                 printk(KERN_ERR PFX
2259                        "%s: tag= %lld already get aborted\n",
2260                        __func__, (unsigned long long)tag);
2261                 goto out_aborted;
2262         }
2263
2264         dir = scst_cmd_get_data_direction(scmnd);
2265         status = scst_cmd_get_status(scmnd) & 0xff;
2266
2267         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2268
2269         if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2270                 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2271                 if (srp_rsp->sense_data_len >
2272                     (MAX_MESSAGE_SIZE - sizeof *srp_rsp))
2273                         srp_rsp->sense_data_len =
2274                             MAX_MESSAGE_SIZE - sizeof *srp_rsp;
2275
2276                 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2277                        srp_rsp->sense_data_len);
2278
2279                 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2280                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2281
2282                 if (!status)
2283                         status = SAM_STAT_CHECK_CONDITION;
2284         }
2285
2286         srp_rsp->status = status;
2287
2288         /* transfer read data if any */
2289         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2290                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2291                 if (ret != SCST_TGT_RES_SUCCESS) {
2292                         printk(KERN_ERR PFX
2293                                "%s: tag= %lld xfer_data failed\n",
2294                                __func__, (unsigned long long)tag);
2295                         goto out;
2296                 }
2297         }
2298
2299         ioctx->state = SRPT_STATE_PROCESSED;
2300
2301         if (srpt_post_send(ch, ioctx,
2302                            sizeof *srp_rsp +
2303                            be32_to_cpu(srp_rsp->sense_data_len))) {
2304                 printk(KERN_ERR PFX "%s: ch->state= %d tag= %lld\n",
2305                        __func__, ch->state,
2306                        (unsigned long long)tag);
2307                 ret = SCST_TGT_RES_FATAL_ERROR;
2308         }
2309
2310 out:
2311         return ret;
2312
2313 out_aborted:
2314         ret = SCST_TGT_RES_SUCCESS;
2315         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
2316         ioctx->state = SRPT_STATE_ABORTED;
2317         scst_tgt_cmd_done(scmnd, SCST_CONTEXT_SAME);
2318         goto out;
2319 }
2320
2321 /*
2322  * Called by the SCST core to inform ib_srpt that a received task management
2323  * function has been completed. Must not block.
2324  */
2325 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2326 {
2327         struct srpt_rdma_ch *ch;
2328         struct srpt_mgmt_ioctx *mgmt_ioctx;
2329         struct srpt_ioctx *ioctx;
2330         int rsp_len;
2331
2332         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2333         BUG_ON(!mgmt_ioctx);
2334
2335         ch = mgmt_ioctx->ch;
2336         BUG_ON(!ch);
2337
2338         ioctx = mgmt_ioctx->ioctx;
2339         BUG_ON(!ioctx);
2340
2341         TRACE_DBG("%s: tsk_mgmt_done for tag= %lld status=%d\n",
2342                   __func__, (unsigned long long)mgmt_ioctx->tag,
2343                   scst_mgmt_cmd_get_status(mcmnd));
2344
2345         rsp_len = srpt_build_tskmgmt_rsp(ch, ioctx,
2346                                          (scst_mgmt_cmd_get_status(mcmnd) ==
2347                                           SCST_MGMT_STATUS_SUCCESS) ?
2348                                          SRP_TSK_MGMT_SUCCESS :
2349                                          SRP_TSK_MGMT_FAILED,
2350                                          mgmt_ioctx->tag);
2351         srpt_post_send(ch, ioctx, rsp_len);
2352
2353         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2354
2355         kfree(mgmt_ioctx);
2356 }
2357
2358 /*
2359  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2360  * to be freed. May be called in IRQ context.
2361  */
2362 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2363 {
2364         struct srpt_rdma_ch *ch;
2365         struct srpt_ioctx *ioctx;
2366
2367         ioctx = scst_cmd_get_tgt_priv(scmnd);
2368         BUG_ON(!ioctx);
2369
2370         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2371         BUG_ON(!ch);
2372
2373         spin_lock_irq(&ch->spinlock);
2374         list_del(&ioctx->scmnd_list);
2375         ch->active_scmnd_cnt--;
2376         spin_unlock_irq(&ch->spinlock);
2377
2378         srpt_reset_ioctx(ch, ioctx);
2379         scst_cmd_set_tgt_priv(scmnd, NULL);
2380 }
2381
2382 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2383 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2384 static void srpt_refresh_port_work(void *ctx)
2385 #else
2386 static void srpt_refresh_port_work(struct work_struct *work)
2387 #endif
2388 {
2389 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2390         struct srpt_port *sport = (struct srpt_port *)ctx;
2391 #else
2392         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2393 #endif
2394
2395         srpt_refresh_port(sport);
2396 }
2397
2398 /*
2399  * Called by the SCST core to detect target adapters. Returns the number of
2400  * detected target adapters.
2401  */
2402 static int srpt_detect(struct scst_tgt_template *tp)
2403 {
2404         int device_count;
2405
2406         TRACE_ENTRY();
2407
2408         device_count = atomic_read(&srpt_device_count);
2409
2410         TRACE_EXIT_RES(device_count);
2411
2412         return device_count;
2413 }
2414
2415 /*
2416  * Callback function called by the SCST core from scst_unregister() to free up
2417  * the resources associated with device scst_tgt.
2418  */
2419 static int srpt_release(struct scst_tgt *scst_tgt)
2420 {
2421         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2422         struct srpt_rdma_ch *ch, *tmp_ch;
2423
2424         TRACE_ENTRY();
2425
2426         BUG_ON(!scst_tgt);
2427 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2428         WARN_ON(!sdev);
2429         if (!sdev)
2430                 return -ENODEV;
2431 #else
2432         if (WARN_ON(!sdev))
2433                 return -ENODEV;
2434 #endif
2435
2436         srpt_unregister_procfs_entry(scst_tgt->tgtt);
2437
2438         spin_lock_irq(&sdev->spinlock);
2439         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
2440                 list_del(&ch->list);
2441                 spin_unlock_irq(&sdev->spinlock);
2442                 srpt_release_channel(ch, 1);
2443                 spin_lock_irq(&sdev->spinlock);
2444         }
2445         spin_unlock_irq(&sdev->spinlock);
2446
2447         srpt_unregister_mad_agent(sdev);
2448
2449         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2450
2451         TRACE_EXIT();
2452
2453         return 0;
2454 }
2455
2456 /*
2457  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2458  * when the module parameter 'thread' is not zero (the default is zero).
2459  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2460  *
2461  * @pre thread != 0
2462  */
2463 static int srpt_ioctx_thread(void *arg)
2464 {
2465         struct srpt_ioctx *ioctx;
2466
2467         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2468         current->flags |= PF_NOFREEZE;
2469
2470         spin_lock_irq(&srpt_thread.thread_lock);
2471         while (!kthread_should_stop()) {
2472                 wait_queue_t wait;
2473                 init_waitqueue_entry(&wait, current);
2474
2475                 if (!srpt_test_ioctx_list()) {
2476                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2477
2478                         for (;;) {
2479                                 set_current_state(TASK_INTERRUPTIBLE);
2480                                 if (srpt_test_ioctx_list())
2481                                         break;
2482                                 spin_unlock_irq(&srpt_thread.thread_lock);
2483                                 schedule();
2484                                 spin_lock_irq(&srpt_thread.thread_lock);
2485                         }
2486                         set_current_state(TASK_RUNNING);
2487                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2488                 }
2489
2490                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2491                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2492                                            struct srpt_ioctx, comp_list);
2493
2494                         list_del(&ioctx->comp_list);
2495
2496                         spin_unlock_irq(&srpt_thread.thread_lock);
2497                         switch (ioctx->op) {
2498                         case IB_WC_SEND:
2499                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2500                                         SCST_CONTEXT_DIRECT);
2501                                 break;
2502                         case IB_WC_RDMA_WRITE:
2503                         case IB_WC_RDMA_READ:
2504                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2505                                 break;
2506                         case IB_WC_RECV:
2507                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2508                                 break;
2509                         default:
2510                                 break;
2511                         }
2512                         spin_lock_irq(&srpt_thread.thread_lock);
2513                 }
2514         }
2515         spin_unlock_irq(&srpt_thread.thread_lock);
2516
2517         return 0;
2518 }
2519
2520 /* SCST target template for the SRP target implementation. */
2521 static struct scst_tgt_template srpt_template = {
2522         .name = DRV_NAME,
2523         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2524         .xmit_response_atomic = 1,
2525         .rdy_to_xfer_atomic = 1,
2526         .no_proc_entry = 0,
2527         .detect = srpt_detect,
2528         .release = srpt_release,
2529         .xmit_response = srpt_xmit_response,
2530         .rdy_to_xfer = srpt_rdy_to_xfer,
2531         .on_free_cmd = srpt_on_free_cmd,
2532         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2533 };
2534
2535 /*
2536  * The callback function srpt_release_class_dev() is called whenever a
2537  * device is removed from the /sys/class/infiniband_srpt device class.
2538  */
2539 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2540 static void srpt_release_class_dev(struct class_device *class_dev)
2541 #else
2542 static void srpt_release_class_dev(struct device *dev)
2543 #endif
2544 {
2545 }
2546
2547 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2548 static int srpt_trace_level_show(struct seq_file *seq, void *v)
2549 {
2550         return scst_proc_log_entry_read(seq, trace_flag, NULL);
2551 }
2552
2553 static ssize_t srpt_proc_trace_level_write(struct file *file,
2554         const char __user *buf, size_t length, loff_t *off)
2555 {
2556         return scst_proc_log_entry_write(file, buf, length, &trace_flag,
2557                 DEFAULT_SRPT_TRACE_FLAGS, NULL);
2558 }
2559
2560 static struct scst_proc_data srpt_log_proc_data = {
2561         SCST_DEF_RW_SEQ_OP(srpt_proc_trace_level_write)
2562         .show = srpt_trace_level_show,
2563 };
2564 #endif
2565
2566 static struct class_attribute srpt_class_attrs[] = {
2567         __ATTR_NULL,
2568 };
2569
2570 static struct class srpt_class = {
2571         .name = "infiniband_srpt",
2572 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2573         .release = srpt_release_class_dev,
2574 #else
2575         .dev_release = srpt_release_class_dev,
2576 #endif
2577         .class_attrs = srpt_class_attrs,
2578 };
2579
2580 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2581 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2582 #else
2583 static ssize_t show_login_info(struct device *dev,
2584                                struct device_attribute *attr, char *buf)
2585 #endif
2586 {
2587         struct srpt_device *sdev =
2588 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2589                 container_of(class_dev, struct srpt_device, class_dev);
2590 #else
2591                 container_of(dev, struct srpt_device, dev);
2592 #endif
2593         struct srpt_port *sport;
2594         int i;
2595         int len = 0;
2596
2597         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2598                 sport = &sdev->port[i];
2599
2600                 len += sprintf(buf + len,
2601                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2602                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2603                                "service_id=%016llx\n",
2604                                (unsigned long long) mellanox_ioc_guid,
2605                                (unsigned long long) mellanox_ioc_guid,
2606                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2607                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2608                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2609                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2610                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2611                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2612                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2613                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2614                                (unsigned long long) mellanox_ioc_guid);
2615         }
2616
2617         return len;
2618 }
2619
2620 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2621 static CLASS_DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2622 #else
2623 static DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2624 #endif
2625
2626 /*
2627  * Callback function called by the InfiniBand core when either an InfiniBand
2628  * device has been added or during the ib_register_client() call for each
2629  * registered InfiniBand device.
2630  */
2631 static void srpt_add_one(struct ib_device *device)
2632 {
2633         struct srpt_device *sdev;
2634         struct srpt_port *sport;
2635         struct ib_srq_init_attr srq_attr;
2636         int i;
2637
2638         TRACE_ENTRY();
2639
2640         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2641         if (!sdev)
2642                 return;
2643
2644         sdev->device = device;
2645
2646 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2647         sdev->class_dev.class = &srpt_class;
2648         sdev->class_dev.dev = device->dma_device;
2649         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2650                  "srpt-%s", device->name);
2651 #else
2652         sdev->dev.class = &srpt_class;
2653         sdev->dev.parent = device->dma_device;
2654 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2655         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2656 #else
2657         snprintf(sdev->init_name, sizeof(sdev->init_name),
2658                  "srpt-%s", device->name);
2659         sdev->dev.init_name = sdev->init_name;
2660 #endif
2661 #endif
2662
2663 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2664         if (class_device_register(&sdev->class_dev))
2665                 goto free_dev;
2666         if (class_device_create_file(&sdev->class_dev,
2667                                      &class_device_attr_login_info))
2668                 goto err_dev;
2669 #else
2670         if (device_register(&sdev->dev))
2671                 goto free_dev;
2672         if (device_create_file(&sdev->dev, &dev_attr_login_info))
2673                 goto err_dev;
2674 #endif
2675
2676         if (ib_query_device(device, &sdev->dev_attr))
2677                 goto err_dev;
2678
2679         sdev->pd = ib_alloc_pd(device);
2680         if (IS_ERR(sdev->pd))
2681                 goto err_dev;
2682
2683         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2684         if (IS_ERR(sdev->mr))
2685                 goto err_pd;
2686
2687         srq_attr.event_handler = srpt_srq_event;
2688         srq_attr.srq_context = (void *)sdev;
2689         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2690         srq_attr.attr.max_sge = 1;
2691         srq_attr.attr.srq_limit = 0;
2692
2693         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2694         if (IS_ERR(sdev->srq))
2695                 goto err_mr;
2696
2697         TRACE_DBG("%s: create SRQ #wr= %d max_allow=%d dev= %s",
2698                __func__, srq_attr.attr.max_wr,
2699               sdev->dev_attr.max_srq_wr, device->name);
2700
2701         if (!mellanox_ioc_guid)
2702                 mellanox_ioc_guid = be64_to_cpu(device->node_guid);
2703
2704         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2705         if (IS_ERR(sdev->cm_id))
2706                 goto err_srq;
2707
2708         /* print out target login information */
2709         TRACE_DBG("Target login info: id_ext=%016llx,"
2710                   "ioc_guid=%016llx,pkey=ffff,service_id=%016llx",
2711                   (unsigned long long) mellanox_ioc_guid,
2712                   (unsigned long long) mellanox_ioc_guid,
2713                   (unsigned long long) mellanox_ioc_guid);
2714
2715         /*
2716          * We do not have a consistent service_id (ie. also id_ext of target_id)
2717          * to identify this target. We currently use the guid of the first HCA
2718          * in the system as service_id; therefore, the target_id will change
2719          * if this HCA is gone bad and replaced by different HCA
2720          */
2721         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(mellanox_ioc_guid), 0, NULL))
2722                 goto err_cm;
2723
2724         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2725                               srpt_event_handler);
2726         if (ib_register_event_handler(&sdev->event_handler))
2727                 goto err_cm;
2728
2729         if (srpt_alloc_ioctx_ring(sdev))
2730                 goto err_event;
2731
2732         INIT_LIST_HEAD(&sdev->rch_list);
2733         spin_lock_init(&sdev->spinlock);
2734
2735         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2736                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2737
2738         ib_set_client_data(device, &srpt_client, sdev);
2739
2740         sdev->scst_tgt = scst_register(&srpt_template, NULL);
2741         if (!sdev->scst_tgt) {
2742                 printk(KERN_ERR PFX "SCST registration failed for %s.\n",
2743                         sdev->device->name);
2744                 goto err_ring;
2745         }
2746
2747         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
2748
2749         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
2750                 sport = &sdev->port[i - 1];
2751                 sport->sdev = sdev;
2752                 sport->port = i;
2753 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2754                 /*
2755                  * A vanilla 2.6.19 or older kernel without backported OFED
2756                  * kernel headers.
2757                  */
2758                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
2759 #else
2760                 INIT_WORK(&sport->work, srpt_refresh_port_work);
2761 #endif
2762                 if (srpt_refresh_port(sport)) {
2763                         printk(KERN_ERR PFX "MAD registration failed"
2764                                " for %s-%d.\n", sdev->device->name, i);
2765                         goto err_refresh_port;
2766                 }
2767         }
2768
2769         atomic_inc(&srpt_device_count);
2770
2771         TRACE_EXIT();
2772
2773         return;
2774
2775 err_refresh_port:
2776         scst_unregister(sdev->scst_tgt);
2777 err_ring:
2778         ib_set_client_data(device, &srpt_client, NULL);
2779         srpt_free_ioctx_ring(sdev);
2780 err_event:
2781         ib_unregister_event_handler(&sdev->event_handler);
2782 err_cm:
2783         ib_destroy_cm_id(sdev->cm_id);
2784 err_srq:
2785         ib_destroy_srq(sdev->srq);
2786 err_mr:
2787         ib_dereg_mr(sdev->mr);
2788 err_pd:
2789         ib_dealloc_pd(sdev->pd);
2790 err_dev:
2791 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2792         class_device_unregister(&sdev->class_dev);
2793 #else
2794         device_unregister(&sdev->dev);
2795 #endif
2796 free_dev:
2797         kfree(sdev);
2798
2799         TRACE_EXIT();
2800 }
2801
2802 /*
2803  * Callback function called by the InfiniBand core when either an InfiniBand
2804  * device has been removed or during the ib_unregister_client() call for each
2805  * registered InfiniBand device.
2806  */
2807 static void srpt_remove_one(struct ib_device *device)
2808 {
2809         int i;
2810         struct srpt_device *sdev;
2811
2812         TRACE_ENTRY();
2813
2814         sdev = ib_get_client_data(device, &srpt_client);
2815 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2816         WARN_ON(!sdev);
2817         if (!sdev)
2818                 return;
2819 #else
2820         if (WARN_ON(!sdev))
2821                 return;
2822 #endif
2823
2824         /*
2825          * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
2826          * finished if it is running.
2827          */
2828         for (i = 0; i < sdev->device->phys_port_cnt; i++)
2829 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
2830                 cancel_work_sync(&sdev->port[i].work);
2831 #else
2832                 /*
2833                  * cancel_work_sync() was introduced in kernel 2.6.22. Older
2834                  * kernels do not have a facility to cancel scheduled work.
2835                  */
2836                 printk(KERN_ERR PFX
2837                        "your kernel does not provide cancel_work_sync().\n");
2838 #endif
2839
2840         scst_unregister(sdev->scst_tgt);
2841         sdev->scst_tgt = NULL;
2842
2843         ib_unregister_event_handler(&sdev->event_handler);
2844         ib_destroy_cm_id(sdev->cm_id);
2845         ib_destroy_srq(sdev->srq);
2846         ib_dereg_mr(sdev->mr);
2847         ib_dealloc_pd(sdev->pd);
2848 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2849         class_device_unregister(&sdev->class_dev);
2850 #else
2851         device_unregister(&sdev->dev);
2852 #endif
2853
2854         srpt_free_ioctx_ring(sdev);
2855         kfree(sdev);
2856
2857         TRACE_EXIT();
2858 }
2859
2860 /**
2861  * Create procfs entries for srpt. Currently the only procfs entry created
2862  * by this function is the "trace_level" entry.
2863  */
2864 static int srpt_register_procfs_entry(struct scst_tgt_template *tgt)
2865 {
2866         int res = 0;
2867 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2868         struct proc_dir_entry *p, *root;
2869
2870         root = scst_proc_get_tgt_root(tgt);
2871         WARN_ON(!root);
2872         if (root) {
2873                 /*
2874                  * Fill in the scst_proc_data::data pointer, which is used in
2875                  * a printk(KERN_INFO ...) statement in
2876                  * scst_proc_log_entry_write() in scst_proc.c.
2877                  */
2878                 srpt_log_proc_data.data = (char *)tgt->name;
2879                 p = scst_create_proc_entry(root, SRPT_PROC_TRACE_LEVEL_NAME,
2880                                            &srpt_log_proc_data);
2881                 if (!p)
2882                         res = -ENOMEM;
2883         } else
2884                 res = -ENOMEM;
2885
2886 #endif
2887         return res;
2888 }
2889
2890 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt)
2891 {
2892 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2893         struct proc_dir_entry *root;
2894
2895         root = scst_proc_get_tgt_root(tgt);
2896         WARN_ON(!root);
2897         if (root)
2898                 remove_proc_entry(SRPT_PROC_TRACE_LEVEL_NAME, root);
2899 #endif
2900 }
2901
2902 /*
2903  * Module initialization.
2904  *
2905  * Note: since ib_register_client() registers callback functions, and since at
2906  * least one of these callback functions (srpt_add_one()) calls SCST functions,
2907  * the SCST target template must be registered before ib_register_client() is
2908  * called.
2909  */
2910 static int __init srpt_init_module(void)
2911 {
2912         int ret;
2913
2914         ret = class_register(&srpt_class);
2915         if (ret) {
2916                 printk(KERN_ERR PFX "couldn't register class ib_srpt\n");
2917                 goto out;
2918         }
2919
2920         ret = scst_register_target_template(&srpt_template);
2921         if (ret < 0) {
2922                 printk(KERN_ERR PFX "couldn't register with scst\n");
2923                 ret = -ENODEV;
2924                 goto out_unregister_class;
2925         }
2926
2927         ret = srpt_register_procfs_entry(&srpt_template);
2928         if (ret) {
2929                 printk(KERN_ERR PFX "couldn't register procfs entry\n");
2930                 goto out_unregister_target;
2931         }
2932
2933         ret = ib_register_client(&srpt_client);
2934         if (ret) {
2935                 printk(KERN_ERR PFX "couldn't register IB client\n");
2936                 goto out_unregister_target;
2937         }
2938
2939         if (thread) {
2940                 spin_lock_init(&srpt_thread.thread_lock);
2941                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
2942                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
2943                                                  NULL, "srpt_thread");
2944                 if (IS_ERR(srpt_thread.thread)) {
2945                         srpt_thread.thread = NULL;
2946                         thread = 0;
2947                 }
2948         }
2949
2950         return 0;
2951
2952 out_unregister_target:
2953         /*
2954          * Note: the procfs entry is unregistered in srpt_release(), which is
2955          * called by scst_unregister_target_template().
2956          */
2957         scst_unregister_target_template(&srpt_template);
2958 out_unregister_class:
2959         class_unregister(&srpt_class);
2960 out:
2961         return ret;
2962 }
2963
2964 static void __exit srpt_cleanup_module(void)
2965 {
2966         TRACE_ENTRY();
2967
2968         if (srpt_thread.thread)
2969                 kthread_stop(srpt_thread.thread);
2970         ib_unregister_client(&srpt_client);
2971         scst_unregister_target_template(&srpt_template);
2972         class_unregister(&srpt_class);
2973
2974         TRACE_EXIT();
2975 }
2976
2977 module_init(srpt_init_module);
2978 module_exit(srpt_cleanup_module);