- Fixed memory leak in srpt_handle_tsk_mgmt() error path.
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43 #include <asm/atomic.h>
44 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #endif
48 #include "ib_srpt.h"
49 #include "scst_debug.h"
50
51 /* Name of this kernel module. */
52 #define DRV_NAME                "ib_srpt"
53 /* Prefix for printk() kernel messages. */
54 #define PFX                     DRV_NAME ": "
55 #define DRV_VERSION             "1.0.1"
56 #define DRV_RELDATE             "July 10, 2008"
57 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
58 /* Flags to be used in SCST debug tracing statements. */
59 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
60                                   | TRACE_MGMT | TRACE_SPECIAL)
61 /* Name of the entry that will be created under /proc/scsi_tgt/ib_srpt. */
62 #define SRPT_PROC_TRACE_LEVEL_NAME      "trace_level"
63 #endif
64
65 #define MELLANOX_SRPT_ID_STRING "Mellanox OFED SRP target"
66
67 MODULE_AUTHOR("Vu Pham");
68 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
69                    "v" DRV_VERSION " (" DRV_RELDATE ")");
70 MODULE_LICENSE("Dual BSD/GPL");
71
72 struct srpt_thread {
73         /* Protects thread_ioctx_list. */
74         spinlock_t thread_lock;
75         /* I/O contexts to be processed by the kernel thread. */
76         struct list_head thread_ioctx_list;
77         /* SRPT kernel thread. */
78         struct task_struct *thread;
79 };
80
81 /*
82  * Global Variables
83  */
84
85 static u64 mellanox_ioc_guid;
86 /* List of srpt_device structures. */
87 static atomic_t srpt_device_count;
88 static int thread;
89 static struct srpt_thread srpt_thread;
90 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
91 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
92 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
93 module_param(trace_flag, long, 0644);
94 MODULE_PARM_DESC(trace_flag,
95                  "Trace flags for the ib_srpt kernel module.");
96 #endif
97
98 module_param(thread, int, 0444);
99 MODULE_PARM_DESC(thread,
100                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
101                  "where possible");
102
103 static void srpt_add_one(struct ib_device *device);
104 static void srpt_remove_one(struct ib_device *device);
105 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
106 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt);
107
108 static struct ib_client srpt_client = {
109         .name = DRV_NAME,
110         .add = srpt_add_one,
111         .remove = srpt_remove_one
112 };
113
114 /**
115  * Atomically test and set the channel state.
116  * @ch: RDMA channel.
117  * @old: channel state to compare with.
118  * @new: state to change the channel state to if the current state matches the
119  *       argument 'old'.
120  *
121  * Returns true if the channel state matched old upon entry of this function,
122  * and false otherwise.
123  */
124 static bool srpt_test_and_set_channel_state(struct srpt_rdma_ch *ch,
125                                             enum rdma_ch_state old,
126                                             enum rdma_ch_state new)
127 {
128         unsigned long flags;
129         enum rdma_ch_state cur;
130
131         spin_lock_irqsave(&ch->spinlock, flags);
132         cur = ch->state;
133         if (cur == old)
134                 ch->state = new;
135         spin_unlock_irqrestore(&ch->spinlock, flags);
136
137         return cur == old;
138 }
139
140 /*
141  * Callback function called by the InfiniBand core when an asynchronous IB
142  * event occurs. This callback may occur in interrupt context. See also
143  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
144  * Architecture Specification.
145  */
146 static void srpt_event_handler(struct ib_event_handler *handler,
147                                struct ib_event *event)
148 {
149         struct srpt_device *sdev;
150         struct srpt_port *sport;
151
152         sdev = ib_get_client_data(event->device, &srpt_client);
153         if (!sdev || sdev->device != event->device)
154                 return;
155
156         TRACE_DBG("ASYNC event= %d on device= %s",
157                   event->event, sdev->device->name);
158
159         switch (event->event) {
160         case IB_EVENT_PORT_ERR:
161                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
162                         sport = &sdev->port[event->element.port_num - 1];
163                         sport->lid = 0;
164                         sport->sm_lid = 0;
165                 }
166                 break;
167         case IB_EVENT_PORT_ACTIVE:
168         case IB_EVENT_LID_CHANGE:
169         case IB_EVENT_PKEY_CHANGE:
170         case IB_EVENT_SM_CHANGE:
171         case IB_EVENT_CLIENT_REREGISTER:
172                 /*
173                  * Refresh port data asynchronously. Note: it is safe to call
174                  * schedule_work() even if &sport->work is already on the
175                  * global workqueue because schedule_work() tests for the
176                  * work_pending() condition before adding &sport->work to the
177                  * global work queue.
178                  */
179                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
180                         sport = &sdev->port[event->element.port_num - 1];
181                         if (!sport->lid && !sport->sm_lid)
182                                 schedule_work(&sport->work);
183                 }
184                 break;
185         default:
186                 break;
187         }
188
189 }
190
191 /*
192  * Callback function called by the InfiniBand core for SRQ (shared receive
193  * queue) events.
194  */
195 static void srpt_srq_event(struct ib_event *event, void *ctx)
196 {
197         TRACE_DBG("SRQ event %d", event->event);
198 }
199
200 /*
201  * Callback function called by the InfiniBand core for QP (queue pair) events.
202  */
203 static void srpt_qp_event(struct ib_event *event, void *ctx)
204 {
205         struct srpt_rdma_ch *ch = ctx;
206
207         TRACE_DBG("QP event %d on cm_id=%p sess_name=%s state=%d",
208                   event->event, ch->cm_id, ch->sess_name, ch->state);
209
210         switch (event->event) {
211         case IB_EVENT_COMM_EST:
212 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
213                 ib_cm_notify(ch->cm_id, event->event);
214 #else
215                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
216                 printk(KERN_ERR PFX "how to perform ib_cm_notify() on a"
217                         " vanilla 2.6.18 kernel ???\n");
218 #endif
219                 break;
220         case IB_EVENT_QP_LAST_WQE_REACHED:
221                 if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_LIVE,
222                                         RDMA_CHANNEL_DISCONNECTING)) {
223                         TRACE_DBG("%s", "Disconnecting channel.");
224                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
225                 }
226                 break;
227         default:
228                 break;
229         }
230 }
231
232 /*
233  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
234  * the lowest four bits of value in element slot of the array of four bit
235  * elements called c_list (controller list). The index slot is one-based.
236  *
237  * @pre 1 <= slot && 0 <= value && value < 16
238  */
239 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
240 {
241         u16 id;
242         u8 tmp;
243
244         id = (slot - 1) / 2;
245         if (slot & 0x1) {
246                 tmp = c_list[id] & 0xf;
247                 c_list[id] = (value << 4) | tmp;
248         } else {
249                 tmp = c_list[id] & 0xf0;
250                 c_list[id] = (value & 0xf) | tmp;
251         }
252 }
253
254 /*
255  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
256  * ClassPortInfo in the InfiniBand Architecture Specification.
257  */
258 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
259 {
260         struct ib_class_port_info *cif;
261
262         cif = (struct ib_class_port_info *)mad->data;
263         memset(cif, 0, sizeof *cif);
264         cif->base_version = 1;
265         cif->class_version = 1;
266         cif->resp_time_value = 20;
267
268         mad->mad_hdr.status = 0;
269 }
270
271 /*
272  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
273  * InfiniBand Architecture Specification. See also section B.7,
274  * table B.6 in the T10 SRP r16a document.
275  */
276 static void srpt_get_iou(struct ib_dm_mad *mad)
277 {
278         struct ib_dm_iou_info *ioui;
279         u8 slot;
280         int i;
281
282         ioui = (struct ib_dm_iou_info *)mad->data;
283         ioui->change_id = 1;
284         ioui->max_controllers = 16;
285
286         /* set present for slot 1 and empty for the rest */
287         srpt_set_ioc(ioui->controller_list, 1, 1);
288         for (i = 1, slot = 2; i < 16; i++, slot++)
289                 srpt_set_ioc(ioui->controller_list, slot, 0);
290
291         mad->mad_hdr.status = 0;
292 }
293
294 /*
295  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
296  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
297  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
298  * document.
299  */
300 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
301                          struct ib_dm_mad *mad)
302 {
303         struct ib_dm_ioc_profile *iocp;
304
305         iocp = (struct ib_dm_ioc_profile *)mad->data;
306
307         if (!slot || slot > 16) {
308                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
309                 return;
310         }
311
312         if (slot > 2) {
313                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
314                 return;
315         }
316
317         memset(iocp, 0, sizeof *iocp);
318         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
319         iocp->guid = cpu_to_be64(mellanox_ioc_guid);
320         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
321         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
322         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
323         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
324         iocp->subsys_device_id = 0x0;
325         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
326         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
327         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
328         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
329         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
330         iocp->rdma_read_depth = 4;
331         iocp->send_size = cpu_to_be32(MAX_MESSAGE_SIZE);
332         iocp->rdma_size = cpu_to_be32(MAX_RDMA_SIZE);
333         iocp->num_svc_entries = 1;
334         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
335             SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
336
337         mad->mad_hdr.status = 0;
338 }
339
340 /*
341  * Device management: write ServiceEntries to mad for the given slot. See also
342  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
343  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
344  */
345 static void srpt_get_svc_entries(u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
346 {
347         struct ib_dm_svc_entries *svc_entries;
348
349         if (!slot || slot > 16) {
350                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
351                 return;
352         }
353
354         if (slot > 2 || lo > hi || hi > 1) {
355                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
356                 return;
357         }
358
359         svc_entries = (struct ib_dm_svc_entries *)mad->data;
360         memset(svc_entries, 0, sizeof *svc_entries);
361         svc_entries->service_entries[0].id = cpu_to_be64(mellanox_ioc_guid);
362         snprintf(svc_entries->service_entries[0].name,
363                  sizeof(svc_entries->service_entries[0].name),
364                  "%s%016llx",
365                  SRP_SERVICE_NAME_PREFIX,
366                  (unsigned long long)mellanox_ioc_guid);
367
368         mad->mad_hdr.status = 0;
369 }
370
371 /*
372  * Actual processing of a received MAD *rq_mad received through source port *sp
373  * (MAD = InfiniBand management datagram). The response to be sent back is
374  * written to *rsp_mad.
375  */
376 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
377                                  struct ib_dm_mad *rsp_mad)
378 {
379         u16 attr_id;
380         u32 slot;
381         u8 hi, lo;
382
383         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
384         switch (attr_id) {
385         case DM_ATTR_CLASS_PORT_INFO:
386                 srpt_get_class_port_info(rsp_mad);
387                 break;
388         case DM_ATTR_IOU_INFO:
389                 srpt_get_iou(rsp_mad);
390                 break;
391         case DM_ATTR_IOC_PROFILE:
392                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
393                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
394                 break;
395         case DM_ATTR_SVC_ENTRIES:
396                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
397                 hi = (u8) ((slot >> 8) & 0xff);
398                 lo = (u8) (slot & 0xff);
399                 slot = (u16) ((slot >> 16) & 0xffff);
400                 srpt_get_svc_entries(slot, hi, lo, rsp_mad);
401                 break;
402         default:
403                 rsp_mad->mad_hdr.status =
404                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
405                 break;
406         }
407 }
408
409 /*
410  * Callback function that is called by the InfiniBand core after transmission of
411  * a MAD. (MAD = management datagram; AH = address handle.)
412  */
413 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
414                                   struct ib_mad_send_wc *mad_wc)
415 {
416         ib_destroy_ah(mad_wc->send_buf->ah);
417         ib_free_send_mad(mad_wc->send_buf);
418 }
419
420 /*
421  * Callback function that is called by the InfiniBand core after reception of
422  * a MAD (management datagram).
423  */
424 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
425                                   struct ib_mad_recv_wc *mad_wc)
426 {
427         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
428         struct ib_ah *ah;
429         struct ib_mad_send_buf *rsp;
430         struct ib_dm_mad *dm_mad;
431
432         if (!mad_wc || !mad_wc->recv_buf.mad)
433                 return;
434
435         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
436                                   mad_wc->recv_buf.grh, mad_agent->port_num);
437         if (IS_ERR(ah))
438                 goto err;
439
440         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
441
442         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
443                                  mad_wc->wc->pkey_index, 0,
444                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
445                                  GFP_KERNEL);
446         if (IS_ERR(rsp))
447                 goto err_rsp;
448
449         rsp->ah = ah;
450
451         dm_mad = rsp->mad;
452         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
453         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
454         dm_mad->mad_hdr.status = 0;
455
456         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
457         case IB_MGMT_METHOD_GET:
458                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
459                 break;
460         case IB_MGMT_METHOD_SET:
461                 dm_mad->mad_hdr.status =
462                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
463                 break;
464         default:
465                 dm_mad->mad_hdr.status =
466                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
467                 break;
468         }
469
470         if (!ib_post_send_mad(rsp, NULL)) {
471                 ib_free_recv_mad(mad_wc);
472                 /* will destroy_ah & free_send_mad in send completion */
473                 return;
474         }
475
476         ib_free_send_mad(rsp);
477
478 err_rsp:
479         ib_destroy_ah(ah);
480 err:
481         ib_free_recv_mad(mad_wc);
482 }
483
484 /*
485  * Enable InfiniBand management datagram processing, update the cached sm_lid,
486  * lid and gid values, and register a callback function for processing MADs
487  * on the specified port. It is safe to call this function more than once for
488  * the same port.
489  */
490 static int srpt_refresh_port(struct srpt_port *sport)
491 {
492         struct ib_mad_reg_req reg_req;
493         struct ib_port_modify port_modify;
494         struct ib_port_attr port_attr;
495         int ret;
496
497         memset(&port_modify, 0, sizeof port_modify);
498         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
499         port_modify.clr_port_cap_mask = 0;
500
501         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
502         if (ret)
503                 goto err_mod_port;
504
505         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
506         if (ret)
507                 goto err_query_port;
508
509         sport->sm_lid = port_attr.sm_lid;
510         sport->lid = port_attr.lid;
511
512         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
513         if (ret)
514                 goto err_query_port;
515
516         if (!sport->mad_agent) {
517                 memset(&reg_req, 0, sizeof reg_req);
518                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
519                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
520                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
521                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
522
523                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
524                                                          sport->port,
525                                                          IB_QPT_GSI,
526                                                          &reg_req, 0,
527                                                          srpt_mad_send_handler,
528                                                          srpt_mad_recv_handler,
529                                                          sport);
530                 if (IS_ERR(sport->mad_agent)) {
531                         ret = PTR_ERR(sport->mad_agent);
532                         sport->mad_agent = NULL;
533                         goto err_query_port;
534                 }
535         }
536
537         return 0;
538
539 err_query_port:
540
541         port_modify.set_port_cap_mask = 0;
542         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
543         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
544
545 err_mod_port:
546
547         return ret;
548 }
549
550 /*
551  * Unregister the callback function for processing MADs and disable MAD
552  * processing for all ports of the specified device. It is safe to call this
553  * function more than once for the same device.
554  */
555 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
556 {
557         struct ib_port_modify port_modify = {
558                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
559         };
560         struct srpt_port *sport;
561         int i;
562
563         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
564                 sport = &sdev->port[i - 1];
565                 WARN_ON(sport->port != i);
566                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
567                         printk(KERN_ERR PFX "disabling MAD processing"
568                                " failed.\n");
569                 if (sport->mad_agent) {
570                         ib_unregister_mad_agent(sport->mad_agent);
571                         sport->mad_agent = NULL;
572                 }
573         }
574 }
575
576 /*
577  * Allocate and initialize an SRPT I/O context structure.
578  */
579 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
580 {
581         struct srpt_ioctx *ioctx;
582
583         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
584         if (!ioctx)
585                 goto out;
586
587         ioctx->buf = kzalloc(MAX_MESSAGE_SIZE, GFP_KERNEL);
588         if (!ioctx->buf)
589                 goto out_free_ioctx;
590
591         ioctx->dma = dma_map_single(sdev->device->dma_device, ioctx->buf,
592                                     MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
593 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
594         if (dma_mapping_error(sdev->device->dma_device, ioctx->dma))
595 #else
596         if (dma_mapping_error(ioctx->dma))
597 #endif
598                 goto out_free_buf;
599
600         return ioctx;
601
602 out_free_buf:
603         kfree(ioctx->buf);
604 out_free_ioctx:
605         kfree(ioctx);
606 out:
607         return NULL;
608 }
609
610 /*
611  * Deallocate an SRPT I/O context structure.
612  */
613 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
614 {
615         if (!ioctx)
616                 return;
617
618         dma_unmap_single(sdev->device->dma_device, ioctx->dma,
619                          MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
620         kfree(ioctx->buf);
621         kfree(ioctx);
622 }
623
624 /*
625  * Associate a ring of SRPT I/O context structures with the specified device.
626  */
627 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
628 {
629         int i;
630
631         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
632                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
633
634                 if (!sdev->ioctx_ring[i])
635                         goto err;
636
637                 sdev->ioctx_ring[i]->index = i;
638         }
639
640         return 0;
641
642 err:
643         while (--i > 0) {
644                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
645                 sdev->ioctx_ring[i] = NULL;
646         }
647         return -ENOMEM;
648 }
649
650 /* Free the ring of SRPT I/O context structures. */
651 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
652 {
653         int i;
654
655         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
656                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
657                 sdev->ioctx_ring[i] = NULL;
658         }
659 }
660
661 /*
662  * Post a receive request on the work queue of InfiniBand device 'sdev'.
663  */
664 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
665 {
666         struct ib_sge list;
667         struct ib_recv_wr wr, *bad_wr;
668
669         wr.wr_id = ioctx->index | SRPT_OP_RECV;
670
671         list.addr = ioctx->dma;
672         list.length = MAX_MESSAGE_SIZE;
673         list.lkey = sdev->mr->lkey;
674
675         wr.next = NULL;
676         wr.sg_list = &list;
677         wr.num_sge = 1;
678
679         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
680 }
681
682 /*
683  * Post an IB send request.
684  * @ch: RDMA channel to post the send request on.
685  * @ioctx: I/O context of the send request.
686  * @len: length of the request to be sent in bytes.
687  *
688  * Returns zero upon success and a non-zero value upon failure.
689  */
690 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
691                           int len)
692 {
693         struct ib_sge list;
694         struct ib_send_wr wr, *bad_wr;
695         struct srpt_device *sdev = ch->sport->sdev;
696
697         dma_sync_single_for_device(sdev->device->dma_device, ioctx->dma,
698                                    MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
699
700         list.addr = ioctx->dma;
701         list.length = len;
702         list.lkey = sdev->mr->lkey;
703
704         wr.next = NULL;
705         wr.wr_id = ioctx->index;
706         wr.sg_list = &list;
707         wr.num_sge = 1;
708         wr.opcode = IB_WR_SEND;
709         wr.send_flags = IB_SEND_SIGNALED;
710
711         return ib_post_send(ch->qp, &wr, &bad_wr);
712 }
713
714 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
715                              int *ind)
716 {
717         struct srp_indirect_buf *idb;
718         struct srp_direct_buf *db;
719
720         *ind = 0;
721         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
722             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
723                 ioctx->n_rbuf = 1;
724                 ioctx->rbufs = &ioctx->single_rbuf;
725
726                 db = (void *)srp_cmd->add_data;
727                 memcpy(ioctx->rbufs, db, sizeof *db);
728                 ioctx->data_len = be32_to_cpu(db->len);
729         } else {
730                 idb = (void *)srp_cmd->add_data;
731
732                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
733
734                 if (ioctx->n_rbuf >
735                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
736                         *ind = 1;
737                         ioctx->n_rbuf = 0;
738                         goto out;
739                 }
740
741                 if (ioctx->n_rbuf == 1)
742                         ioctx->rbufs = &ioctx->single_rbuf;
743                 else
744                         ioctx->rbufs =
745                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
746                 if (!ioctx->rbufs) {
747                         ioctx->n_rbuf = 0;
748                         return -ENOMEM;
749                 }
750
751                 db = idb->desc_list;
752                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
753                 ioctx->data_len = be32_to_cpu(idb->len);
754         }
755 out:
756         return 0;
757 }
758
759 /*
760  * Modify the attributes of queue pair 'qp': allow local write, remote read,
761  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
762  */
763 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
764 {
765         struct ib_qp_attr *attr;
766         int ret;
767
768         attr = kzalloc(sizeof *attr, GFP_KERNEL);
769         if (!attr)
770                 return -ENOMEM;
771
772         attr->qp_state = IB_QPS_INIT;
773         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
774             IB_ACCESS_REMOTE_WRITE;
775         attr->port_num = ch->sport->port;
776         attr->pkey_index = 0;
777
778         ret = ib_modify_qp(qp, attr,
779                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
780                            IB_QP_PKEY_INDEX);
781
782         kfree(attr);
783         return ret;
784 }
785
786 /**
787  * Change the state of a channel to 'ready to receive' (RTR).
788  * @ch: channel of the queue pair.
789  * @qp: queue pair to change the state of.
790  *
791  * Returns zero upon success and a negative value upon failure.
792  */
793 static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
794 {
795         struct ib_qp_attr qp_attr;
796         int attr_mask;
797         int ret;
798
799         qp_attr.qp_state = IB_QPS_RTR;
800         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
801         if (ret)
802                 goto out;
803
804         qp_attr.max_dest_rd_atomic = 4;
805
806         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
807
808 out:
809         return ret;
810 }
811
812 /**
813  * Change the state of a channel to 'ready to send' (RTS).
814  * @ch: channel of the queue pair.
815  * @qp: queue pair to change the state of.
816  *
817  * Returns zero upon success and a negative value upon failure.
818  */
819 static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
820 {
821         struct ib_qp_attr qp_attr;
822         int attr_mask;
823         int ret;
824
825         qp_attr.qp_state = IB_QPS_RTS;
826         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
827         if (ret)
828                 goto out;
829
830         qp_attr.max_rd_atomic = 4;
831
832         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
833
834 out:
835         return ret;
836 }
837
838 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
839 {
840         int i;
841
842         if (ioctx->n_rdma_ius > 0 && ioctx->rdma_ius) {
843                 struct rdma_iu *riu = ioctx->rdma_ius;
844
845                 for (i = 0; i < ioctx->n_rdma_ius; ++i, ++riu)
846                         kfree(riu->sge);
847                 kfree(ioctx->rdma_ius);
848         }
849
850         if (ioctx->n_rbuf > 1)
851                 kfree(ioctx->rbufs);
852
853         if (srpt_post_recv(ch->sport->sdev, ioctx))
854                 printk(KERN_ERR PFX "SRQ post_recv failed - this is serious\n");
855                 /* we should queue it back to free_ioctx queue */
856         else
857                 atomic_inc(&ch->req_lim_delta);
858 }
859
860 static void srpt_abort_scst_cmd(struct srpt_device *sdev,
861                                 struct scst_cmd *scmnd,
862                                 bool tell_initiator)
863 {
864         struct srpt_ioctx *ioctx;
865         scst_data_direction dir;
866
867         ioctx = scst_cmd_get_tgt_priv(scmnd);
868         BUG_ON(!ioctx);
869         dir = scst_cmd_get_data_direction(scmnd);
870         if (dir != SCST_DATA_NONE) {
871                 dma_unmap_sg(sdev->device->dma_device,
872                              scst_cmd_get_sg(scmnd),
873                              scst_cmd_get_sg_cnt(scmnd),
874                              scst_to_tgt_dma_dir(dir));
875
876                 if (ioctx->state == SRPT_STATE_NEED_DATA) {
877                         scst_rx_data(scmnd,
878                                      tell_initiator ? SCST_RX_STATUS_ERROR
879                                      : SCST_RX_STATUS_ERROR_FATAL,
880                                      SCST_CONTEXT_THREAD);
881                         goto out;
882                 } else if (ioctx->state == SRPT_STATE_PROCESSED)
883                         ;
884                 else {
885                         printk(KERN_ERR PFX
886                                "unexpected cmd state %d (SCST) %d (SRPT)\n",
887                                scmnd->state, ioctx->state);
888                         WARN_ON("unexpected cmd state");
889                 }
890         }
891
892         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_FAILED);
893         scst_tgt_cmd_done(scmnd, scst_estimate_context());
894 out:
895         return;
896 }
897
898 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
899 {
900         struct srpt_ioctx *ioctx;
901         struct srpt_device *sdev = ch->sport->sdev;
902
903         if (wc->wr_id & SRPT_OP_RECV) {
904                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
905                 printk(KERN_ERR PFX "This is serious - SRQ is in bad state\n");
906         } else {
907                 ioctx = sdev->ioctx_ring[wc->wr_id];
908
909                 if (ioctx->scmnd)
910                         srpt_abort_scst_cmd(sdev, ioctx->scmnd, true);
911                 else
912                         srpt_reset_ioctx(ch, ioctx);
913         }
914 }
915
916 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
917                                   struct srpt_ioctx *ioctx,
918                                   enum scst_exec_context context)
919 {
920         if (ioctx->scmnd) {
921                 scst_data_direction dir =
922                         scst_cmd_get_data_direction(ioctx->scmnd);
923
924                 if (dir != SCST_DATA_NONE)
925                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
926                                      scst_cmd_get_sg(ioctx->scmnd),
927                                      scst_cmd_get_sg_cnt(ioctx->scmnd),
928                                      scst_to_tgt_dma_dir(dir));
929
930                 scst_tgt_cmd_done(ioctx->scmnd, context);
931         } else
932                 srpt_reset_ioctx(ch, ioctx);
933 }
934
935 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
936                                   struct srpt_ioctx *ioctx)
937 {
938         if (!ioctx->scmnd) {
939                 srpt_reset_ioctx(ch, ioctx);
940                 return;
941         }
942
943         if (scst_cmd_get_data_direction(ioctx->scmnd) == SCST_DATA_WRITE)
944                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
945                         scst_estimate_context());
946 }
947
948 /**
949  * Build an SRP_RSP response PDU.
950  * @ch: RDMA channel through which the request has been received.
951  * @ioctx: I/O context in which the SRP_RSP PDU will be built.
952  * @s_key: sense key that will be stored in the response.
953  * @s_code: value that will be stored in the asc_ascq field of the sense data.
954  * @tag: tag of the request for which this response is being generated.
955  *
956  * Returns the size in bytes of the SRP_RSP response PDU.
957  *
958  * An SRP_RSP PDU contains a SCSI status or service response. See also
959  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP PDU.
960  * See also SPC-2 for more information about sense data.
961  */
962 static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
963                               struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
964                               u64 tag)
965 {
966         struct srp_rsp *srp_rsp;
967         struct sense_data *sense;
968         int limit_delta;
969         int sense_data_len = 0;
970
971         srp_rsp = ioctx->buf;
972         memset(srp_rsp, 0, sizeof *srp_rsp);
973
974         limit_delta = atomic_read(&ch->req_lim_delta);
975         atomic_sub(limit_delta, &ch->req_lim_delta);
976
977         srp_rsp->opcode = SRP_RSP;
978         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
979         srp_rsp->tag = tag;
980
981         if (s_key != NO_SENSE) {
982                 sense_data_len = sizeof *sense + (sizeof *sense % 4);
983                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
984                 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
985                 srp_rsp->sense_data_len = cpu_to_be32(sense_data_len);
986
987                 sense = (struct sense_data *)(srp_rsp + 1);
988                 sense->err_code = 0x70;
989                 sense->key = s_key;
990                 sense->asc_ascq = s_code;
991         }
992
993         return sizeof(*srp_rsp) + sense_data_len;
994 }
995
996 /**
997  * Build a task management response, which is a specific SRP_RSP response PDU.
998  * @ch: RDMA channel through which the request has been received.
999  * @ioctx: I/O context in which the SRP_RSP PDU will be built.
1000  * @rsp_code: RSP_CODE that will be stored in the response.
1001  * @tag: tag of the request for which this response is being generated.
1002  *
1003  * Returns the size in bytes of the SRP_RSP response PDU.
1004  *
1005  * An SRP_RSP PDU contains a SCSI status or service response. See also
1006  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP PDU.
1007  */
1008 static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
1009                                   struct srpt_ioctx *ioctx, u8 rsp_code,
1010                                   u64 tag)
1011 {
1012         struct srp_rsp *srp_rsp;
1013         int limit_delta;
1014         int resp_data_len = 0;
1015
1016         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
1017                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
1018
1019         srp_rsp = ioctx->buf;
1020         memset(srp_rsp, 0, sizeof *srp_rsp);
1021
1022         limit_delta = atomic_read(&ch->req_lim_delta);
1023         atomic_sub(limit_delta, &ch->req_lim_delta);
1024
1025         srp_rsp->opcode = SRP_RSP;
1026         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1027         srp_rsp->tag = tag;
1028
1029         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
1030                 resp_data_len = 4;
1031                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
1032                 srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
1033                 srp_rsp->data[3] = rsp_code;
1034         }
1035
1036         return sizeof(*srp_rsp) + resp_data_len;
1037 }
1038
1039 /*
1040  * Process SRP_CMD.
1041  */
1042 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
1043 {
1044         struct scst_cmd *scmnd;
1045         struct srp_cmd *srp_cmd;
1046         struct srp_rsp *srp_rsp;
1047         scst_data_direction dir = SCST_DATA_NONE;
1048         int indirect_desc = 0;
1049         int ret;
1050         unsigned long flags;
1051
1052         srp_cmd = ioctx->buf;
1053         srp_rsp = ioctx->buf;
1054
1055         if (srp_cmd->buf_fmt) {
1056                 ret = srpt_get_desc_tbl(ioctx, srp_cmd, &indirect_desc);
1057                 if (ret) {
1058                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1059                                            NO_ADD_SENSE, srp_cmd->tag);
1060                         srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1061                         goto err;
1062                 }
1063
1064                 if (indirect_desc) {
1065                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1066                                            NO_ADD_SENSE, srp_cmd->tag);
1067                         srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1068                         goto err;
1069                 }
1070
1071                 if (srp_cmd->buf_fmt & 0xf)
1072                         dir = SCST_DATA_READ;
1073                 else if (srp_cmd->buf_fmt >> 4)
1074                         dir = SCST_DATA_WRITE;
1075                 else
1076                         dir = SCST_DATA_NONE;
1077         } else
1078                 dir = SCST_DATA_NONE;
1079
1080         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
1081                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
1082                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
1083         if (!scmnd) {
1084                 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1085                                    NO_ADD_SENSE, srp_cmd->tag);
1086                 srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1087                 goto err;
1088         }
1089
1090         ioctx->scmnd = scmnd;
1091
1092         switch (srp_cmd->task_attr) {
1093         case SRP_CMD_HEAD_OF_Q:
1094                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
1095                 break;
1096         case SRP_CMD_ORDERED_Q:
1097                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1098                 break;
1099         case SRP_CMD_SIMPLE_Q:
1100                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
1101                 break;
1102         case SRP_CMD_ACA:
1103                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
1104                 break;
1105         default:
1106                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1107                 break;
1108         }
1109
1110         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1111         scst_cmd_set_tgt_priv(scmnd, ioctx);
1112         scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
1113
1114         spin_lock_irqsave(&ch->spinlock, flags);
1115         list_add_tail(&ioctx->scmnd_list, &ch->active_scmnd_list);
1116         ch->active_scmnd_cnt++;
1117         spin_unlock_irqrestore(&ch->spinlock, flags);
1118
1119         scst_cmd_init_done(scmnd, scst_estimate_context());
1120
1121         WARN_ON(srp_rsp->opcode == SRP_RSP);
1122
1123         return 0;
1124
1125 err:
1126         WARN_ON(srp_rsp->opcode != SRP_RSP);
1127
1128         return -1;
1129 }
1130
1131 /*
1132  * Process an SRP_TSK_MGMT request PDU.
1133  *
1134  * Returns 0 upon success and -1 upon failure.
1135  *
1136  * Each task management function is performed by calling one of the
1137  * scst_rx_mgmt_fn*() functions. These functions will either report failure
1138  * or process the task management function asynchronously. The function
1139  * srpt_tsk_mgmt_done() will be called by the SCST core upon completion of the
1140  * task management function. When srpt_handle_tsk_mgmt() reports failure
1141  * (i.e. returns -1) a response PDU will have been built in ioctx->buf. This
1142  * PDU has to be sent back by the caller.
1143  *
1144  * For more information about SRP_TSK_MGMT PDU's, see also section 6.7 in
1145  * the T10 SRP r16a document.
1146  */
1147 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1148                                 struct srpt_ioctx *ioctx)
1149 {
1150         struct srp_tsk_mgmt *srp_tsk;
1151         struct srpt_mgmt_ioctx *mgmt_ioctx;
1152         int ret;
1153
1154         srp_tsk = ioctx->buf;
1155
1156         TRACE_DBG("recv_tsk_mgmt= %d for task_tag= %lld"
1157                   " using tag= %lld cm_id= %p sess= %p",
1158                   srp_tsk->tsk_mgmt_func,
1159                   (unsigned long long) srp_tsk->task_tag,
1160                   (unsigned long long) srp_tsk->tag,
1161                   ch->cm_id, ch->scst_sess);
1162
1163         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1164         if (!mgmt_ioctx) {
1165                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1166                                        srp_tsk->tag);
1167                 goto err;
1168         }
1169
1170         mgmt_ioctx->ioctx = ioctx;
1171         mgmt_ioctx->ch = ch;
1172         mgmt_ioctx->tag = srp_tsk->tag;
1173
1174         switch (srp_tsk->tsk_mgmt_func) {
1175         case SRP_TSK_ABORT_TASK:
1176                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK");
1177                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1178                                           SCST_ABORT_TASK,
1179                                           srp_tsk->task_tag,
1180                                           thread ?
1181                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1182                                           mgmt_ioctx);
1183                 break;
1184         case SRP_TSK_ABORT_TASK_SET:
1185                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK_SET");
1186                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1187                                           SCST_ABORT_TASK_SET,
1188                                           (u8 *) &srp_tsk->lun,
1189                                           sizeof srp_tsk->lun,
1190                                           thread ?
1191                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1192                                           mgmt_ioctx);
1193                 break;
1194         case SRP_TSK_CLEAR_TASK_SET:
1195                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_TASK_SET");
1196                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1197                                           SCST_CLEAR_TASK_SET,
1198                                           (u8 *) &srp_tsk->lun,
1199                                           sizeof srp_tsk->lun,
1200                                           thread ?
1201                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1202                                           mgmt_ioctx);
1203                 break;
1204         case SRP_TSK_LUN_RESET:
1205                 TRACE_DBG("%s", "Processing SRP_TSK_LUN_RESET");
1206                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1207                                           SCST_LUN_RESET,
1208                                           (u8 *) &srp_tsk->lun,
1209                                           sizeof srp_tsk->lun,
1210                                           thread ?
1211                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1212                                           mgmt_ioctx);
1213                 break;
1214         case SRP_TSK_CLEAR_ACA:
1215                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_ACA");
1216                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1217                                           SCST_CLEAR_ACA,
1218                                           (u8 *) &srp_tsk->lun,
1219                                           sizeof srp_tsk->lun,
1220                                           thread ?
1221                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1222                                           mgmt_ioctx);
1223                 break;
1224         default:
1225                 TRACE_DBG("%s", "Unsupported task management function.");
1226                 srpt_build_tskmgmt_rsp(ch, ioctx,
1227                                        SRP_TSK_MGMT_FUNC_NOT_SUPP,
1228                                        srp_tsk->tag);
1229                 goto err;
1230         }
1231
1232         if (ret) {
1233                 TRACE_DBG("%s", "Processing task management function failed.");
1234                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1235                                        srp_tsk->tag);
1236                 goto err;
1237         }
1238
1239         WARN_ON(srp_tsk->opcode == SRP_RSP);
1240
1241         return 0;
1242
1243 err:
1244         WARN_ON(srp_tsk->opcode != SRP_RSP);
1245
1246         kfree(mgmt_ioctx);
1247         return -1;
1248 }
1249
1250 /**
1251  * Process a receive completion event.
1252  * @ch: RDMA channel for which the completion event has been received.
1253  * @ioctx: SRPT I/O context for which the completion event has been received.
1254  */
1255 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1256                                struct srpt_ioctx *ioctx)
1257 {
1258         struct srp_cmd *srp_cmd;
1259         struct srp_rsp *srp_rsp;
1260         unsigned long flags;
1261         int len;
1262
1263         spin_lock_irqsave(&ch->spinlock, flags);
1264         if (ch->state != RDMA_CHANNEL_LIVE) {
1265                 if (ch->state == RDMA_CHANNEL_CONNECTING) {
1266                         list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1267                         spin_unlock_irqrestore(&ch->spinlock, flags);
1268                         return;
1269                 } else {
1270                         spin_unlock_irqrestore(&ch->spinlock, flags);
1271                         srpt_reset_ioctx(ch, ioctx);
1272                         return;
1273                 }
1274         }
1275         spin_unlock_irqrestore(&ch->spinlock, flags);
1276
1277         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
1278                                 MAX_MESSAGE_SIZE, DMA_FROM_DEVICE);
1279
1280         ioctx->data_len = 0;
1281         ioctx->n_rbuf = 0;
1282         ioctx->rbufs = NULL;
1283         ioctx->n_rdma = 0;
1284         ioctx->n_rdma_ius = 0;
1285         ioctx->rdma_ius = NULL;
1286         ioctx->scmnd = NULL;
1287         ioctx->state = SRPT_STATE_NEW;
1288
1289         srp_cmd = ioctx->buf;
1290         srp_rsp = ioctx->buf;
1291
1292         switch (srp_cmd->opcode) {
1293         case SRP_CMD:
1294                 if (srpt_handle_cmd(ch, ioctx) < 0)
1295                         goto err;
1296                 break;
1297
1298         case SRP_TSK_MGMT:
1299                 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1300                         goto err;
1301                 break;
1302
1303         case SRP_I_LOGOUT:
1304         case SRP_AER_REQ:
1305         default:
1306                 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1307                                    srp_cmd->tag);
1308                 goto err;
1309         }
1310
1311         WARN_ON(srp_rsp->opcode == SRP_RSP);
1312
1313         dma_sync_single_for_device(ch->sport->sdev->device->dma_device,
1314                                    ioctx->dma, MAX_MESSAGE_SIZE,
1315                                    DMA_FROM_DEVICE);
1316
1317         return;
1318
1319 err:
1320         WARN_ON(srp_rsp->opcode != SRP_RSP);
1321         len = (sizeof *srp_rsp) + be32_to_cpu(srp_rsp->sense_data_len);
1322
1323         if (ch->state != RDMA_CHANNEL_LIVE) {
1324                 /* Give up if another thread modified the channel state. */
1325                 printk(KERN_ERR PFX "%s: channel is in state %d",
1326                        __func__, ch->state);
1327                 srpt_reset_ioctx(ch, ioctx);
1328         } else if (srpt_post_send(ch, ioctx, len)) {
1329                 printk(KERN_ERR PFX "%s: sending SRP_RSP PDU failed",
1330                        __func__);
1331                 srpt_reset_ioctx(ch, ioctx);
1332         }
1333 }
1334
1335 /*
1336  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1337  * should stop.
1338  * @pre thread != 0
1339  */
1340 static inline int srpt_test_ioctx_list(void)
1341 {
1342         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1343                    unlikely(kthread_should_stop()));
1344         return res;
1345 }
1346
1347 /*
1348  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1349  *
1350  * @pre thread != 0
1351  */
1352 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1353 {
1354         unsigned long flags;
1355
1356         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1357         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1358         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1359         wake_up(&ioctx_list_waitQ);
1360 }
1361
1362 /**
1363  * InfiniBand completion queue callback function.
1364  * @cq: completion queue.
1365  * @ctx: completion queue context, which was passed as the fourth argument of
1366  *       the function ib_create_cq().
1367  */
1368 static void srpt_completion(struct ib_cq *cq, void *ctx)
1369 {
1370         struct srpt_rdma_ch *ch = ctx;
1371         struct srpt_device *sdev = ch->sport->sdev;
1372         struct ib_wc wc;
1373         struct srpt_ioctx *ioctx;
1374
1375         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1376         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1377                 if (wc.status) {
1378                         printk(KERN_ERR PFX "failed %s status= %d\n",
1379                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1380                                wc.status);
1381                         srpt_handle_err_comp(ch, &wc);
1382                         break;
1383                 }
1384
1385                 if (wc.wr_id & SRPT_OP_RECV) {
1386                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1387                         if (thread) {
1388                                 ioctx->ch = ch;
1389                                 ioctx->op = IB_WC_RECV;
1390                                 srpt_schedule_thread(ioctx);
1391                         } else
1392                                 srpt_handle_new_iu(ch, ioctx);
1393                         continue;
1394                 } else
1395                         ioctx = sdev->ioctx_ring[wc.wr_id];
1396
1397                 if (thread) {
1398                         ioctx->ch = ch;
1399                         ioctx->op = wc.opcode;
1400                         srpt_schedule_thread(ioctx);
1401                 } else {
1402                         switch (wc.opcode) {
1403                         case IB_WC_SEND:
1404                                 srpt_handle_send_comp(ch, ioctx,
1405                                         scst_estimate_context());
1406                                 break;
1407                         case IB_WC_RDMA_WRITE:
1408                         case IB_WC_RDMA_READ:
1409                                 srpt_handle_rdma_comp(ch, ioctx);
1410                                 break;
1411                         default:
1412                                 break;
1413                         }
1414                 }
1415         }
1416 }
1417
1418 /*
1419  * Create a completion queue on the specified device.
1420  */
1421 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1422 {
1423         struct ib_qp_init_attr *qp_init;
1424         struct srpt_device *sdev = ch->sport->sdev;
1425         int cqe;
1426         int ret;
1427
1428         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1429         if (!qp_init)
1430                 return -ENOMEM;
1431
1432         /* Create a completion queue (CQ). */
1433
1434         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1435 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1436         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1437 #else
1438         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1439 #endif
1440         if (IS_ERR(ch->cq)) {
1441                 ret = PTR_ERR(ch->cq);
1442                 printk(KERN_ERR PFX "failed to create_cq cqe= %d ret= %d\n",
1443                         cqe, ret);
1444                 goto out;
1445         }
1446
1447         /* Request completion notification. */
1448
1449         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1450
1451         /* Create a queue pair (QP). */
1452
1453         qp_init->qp_context = (void *)ch;
1454         qp_init->event_handler = srpt_qp_event;
1455         qp_init->send_cq = ch->cq;
1456         qp_init->recv_cq = ch->cq;
1457         qp_init->srq = sdev->srq;
1458         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1459         qp_init->qp_type = IB_QPT_RC;
1460         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1461         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1462
1463         ch->qp = ib_create_qp(sdev->pd, qp_init);
1464         if (IS_ERR(ch->qp)) {
1465                 ret = PTR_ERR(ch->qp);
1466                 ib_destroy_cq(ch->cq);
1467                 printk(KERN_ERR PFX "failed to create_qp ret= %d\n", ret);
1468                 goto out;
1469         }
1470
1471         TRACE_DBG("%s: max_cqe= %d max_sge= %d cm_id= %p",
1472                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1473                ch->cm_id);
1474
1475         /* Modify the attributes and the state of queue pair ch->qp. */
1476
1477         ret = srpt_init_ch_qp(ch, ch->qp);
1478         if (ret) {
1479                 ib_destroy_qp(ch->qp);
1480                 ib_destroy_cq(ch->cq);
1481                 goto out;
1482         }
1483
1484         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1485 out:
1486         kfree(qp_init);
1487         return ret;
1488 }
1489
1490 /**
1491  * Look up the RDMA channel that corresponds to the specified cm_id.
1492  *
1493  * Return NULL if no matching RDMA channel has been found.
1494  */
1495 static struct srpt_rdma_ch *srpt_find_channel(struct ib_cm_id *cm_id, bool del)
1496 {
1497         struct srpt_device *sdev = cm_id->context;
1498         struct srpt_rdma_ch *ch;
1499
1500         spin_lock_irq(&sdev->spinlock);
1501         list_for_each_entry(ch, &sdev->rch_list, list) {
1502                 if (ch->cm_id == cm_id) {
1503                         if (del)
1504                                 list_del(&ch->list);
1505                         spin_unlock_irq(&sdev->spinlock);
1506                         return ch;
1507                 }
1508         }
1509
1510         spin_unlock_irq(&sdev->spinlock);
1511
1512         return NULL;
1513 }
1514
1515 /**
1516  * Release all resources associated with the specified RDMA channel.
1517  *
1518  * Note: the caller must have removed the channel from the channel list
1519  * before calling this function.
1520  */
1521 static void srpt_release_channel(struct srpt_rdma_ch *ch, int destroy_cmid)
1522 {
1523         TRACE_ENTRY();
1524
1525         WARN_ON(srpt_find_channel(ch->cm_id, false) == ch);
1526
1527         if (ch->cm_id && destroy_cmid) {
1528                 TRACE_DBG("%s: destroy cm_id= %p", __func__, ch->cm_id);
1529                 ib_destroy_cm_id(ch->cm_id);
1530                 ch->cm_id = NULL;
1531         }
1532
1533         ib_destroy_qp(ch->qp);
1534         ib_destroy_cq(ch->cq);
1535
1536         if (ch->scst_sess) {
1537                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1538
1539                 TRACE_DBG("%s: release sess= %p sess_name= %s active_cmd= %d",
1540                           __func__, ch->scst_sess, ch->sess_name,
1541                           ch->active_scmnd_cnt);
1542
1543                 spin_lock_irq(&ch->spinlock);
1544                 list_for_each_entry_safe(ioctx, ioctx_tmp,
1545                                          &ch->active_scmnd_list, scmnd_list) {
1546                         spin_unlock_irq(&ch->spinlock);
1547
1548                         if (ioctx->scmnd)
1549                                 srpt_abort_scst_cmd(ch->sport->sdev,
1550                                                     ioctx->scmnd, true);
1551
1552                         spin_lock_irq(&ch->spinlock);
1553                 }
1554                 WARN_ON(!list_empty(&ch->active_scmnd_list));
1555                 WARN_ON(ch->active_scmnd_cnt != 0);
1556                 spin_unlock_irq(&ch->spinlock);
1557
1558                 scst_unregister_session(ch->scst_sess, 0, NULL);
1559                 ch->scst_sess = NULL;
1560         }
1561
1562         kfree(ch);
1563
1564         TRACE_EXIT();
1565 }
1566
1567 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1568                             struct ib_cm_req_event_param *param,
1569                             void *private_data)
1570 {
1571         struct srpt_device *sdev = cm_id->context;
1572         struct srp_login_req *req;
1573         struct srp_login_rsp *rsp;
1574         struct srp_login_rej *rej;
1575         struct ib_cm_rep_param *rep_param;
1576         struct srpt_rdma_ch *ch, *tmp_ch;
1577         u32 it_iu_len;
1578         int ret = 0;
1579
1580         if (!sdev || !private_data)
1581                 return -EINVAL;
1582
1583         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1584         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1585         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1586
1587         if (!rsp || !rej || !rep_param) {
1588                 ret = -ENOMEM;
1589                 goto out;
1590         }
1591
1592         req = (struct srp_login_req *)private_data;
1593
1594         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1595
1596         TRACE_DBG("Host login i_port_id=0x%llx:0x%llx t_port_id=0x%llx:0x%llx"
1597             " it_iu_len=%d",
1598             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1599             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1600             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1601             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1602             it_iu_len);
1603
1604         if (it_iu_len > MAX_MESSAGE_SIZE || it_iu_len < 64) {
1605                 rej->reason =
1606                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1607                 ret = -EINVAL;
1608                 TRACE_DBG("Reject invalid it_iu_len=%d", it_iu_len);
1609                 goto reject;
1610         }
1611
1612         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1613                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1614
1615                 spin_lock_irq(&sdev->spinlock);
1616
1617                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1618                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1619                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1620                             && param->port == ch->sport->port
1621                             && param->listen_id == ch->sport->sdev->cm_id
1622                             && ch->cm_id) {
1623                                 enum rdma_ch_state prev_state;
1624
1625                                 /* found an existing channel */
1626                                 TRACE_DBG("Found existing channel name= %s"
1627                                           " cm_id= %p state= %d",
1628                                           ch->sess_name, ch->cm_id, ch->state);
1629
1630                                 prev_state = ch->state;
1631                                 if (ch->state == RDMA_CHANNEL_LIVE)
1632                                         ch->state = RDMA_CHANNEL_DISCONNECTING;
1633                                 else if (ch->state == RDMA_CHANNEL_CONNECTING)
1634                                         list_del(&ch->list);
1635
1636                                 spin_unlock_irq(&sdev->spinlock);
1637
1638                                 rsp->rsp_flags =
1639                                         SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1640
1641                                 if (prev_state == RDMA_CHANNEL_LIVE)
1642                                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
1643                                 else if (prev_state ==
1644                                          RDMA_CHANNEL_CONNECTING) {
1645                                         ib_send_cm_rej(ch->cm_id,
1646                                                        IB_CM_REJ_NO_RESOURCES,
1647                                                        NULL, 0, NULL, 0);
1648                                         srpt_release_channel(ch, 1);
1649                                 }
1650
1651                                 spin_lock_irq(&sdev->spinlock);
1652                         }
1653                 }
1654
1655                 spin_unlock_irq(&sdev->spinlock);
1656
1657         } else
1658                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1659
1660         if (((u64) (*(u64 *) req->target_port_id) !=
1661              cpu_to_be64(mellanox_ioc_guid)) ||
1662             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1663              cpu_to_be64(mellanox_ioc_guid))) {
1664                 rej->reason =
1665                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1666                 ret = -ENOMEM;
1667                 TRACE_DBG("%s", "Reject invalid target_port_id");
1668                 goto reject;
1669         }
1670
1671         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1672         if (!ch) {
1673                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1674                 TRACE_DBG("%s", "Reject failed allocate rdma_ch");
1675                 ret = -ENOMEM;
1676                 goto reject;
1677         }
1678
1679         spin_lock_init(&ch->spinlock);
1680         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1681         memcpy(ch->t_port_id, req->target_port_id, 16);
1682         ch->sport = &sdev->port[param->port - 1];
1683         ch->cm_id = cm_id;
1684         ch->state = RDMA_CHANNEL_CONNECTING;
1685         INIT_LIST_HEAD(&ch->cmd_wait_list);
1686         INIT_LIST_HEAD(&ch->active_scmnd_list);
1687
1688         ret = srpt_create_ch_ib(ch);
1689         if (ret) {
1690                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1691                 TRACE_DBG("%s", "Reject failed to create rdma_ch");
1692                 goto free_ch;
1693         }
1694
1695         ret = srpt_ch_qp_rtr(ch, ch->qp);
1696         if (ret) {
1697                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1698                 TRACE_DBG("Reject failed qp to rtr/rts ret=%d", ret);
1699                 goto destroy_ib;
1700         }
1701
1702         snprintf(ch->sess_name, sizeof(ch->sess_name),
1703                  "0x%016llx%016llx",
1704                  (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1705                  (unsigned long long)be64_to_cpu(*(u64 *)(ch->i_port_id + 8)));
1706
1707         TRACE_DBG("registering session %s", ch->sess_name);
1708
1709         BUG_ON(!sdev->scst_tgt);
1710         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1711                                   NULL, NULL);
1712         if (!ch->scst_sess) {
1713                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1714                 TRACE_DBG("%s", "Failed to create scst sess");
1715                 goto destroy_ib;
1716         }
1717
1718         TRACE_DBG("Establish connection sess=%p name=%s cm_id=%p",
1719                   ch->scst_sess, ch->sess_name, ch->cm_id);
1720
1721         scst_sess_set_tgt_priv(ch->scst_sess, ch);
1722
1723         /* create srp_login_response */
1724         rsp->opcode = SRP_LOGIN_RSP;
1725         rsp->tag = req->tag;
1726         rsp->max_it_iu_len = req->req_it_iu_len;
1727         rsp->max_ti_iu_len = req->req_it_iu_len;
1728         rsp->buf_fmt =
1729             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1730         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1731         atomic_set(&ch->req_lim_delta, 0);
1732
1733         /* create cm reply */
1734         rep_param->qp_num = ch->qp->qp_num;
1735         rep_param->private_data = (void *)rsp;
1736         rep_param->private_data_len = sizeof *rsp;
1737         rep_param->rnr_retry_count = 7;
1738         rep_param->flow_control = 1;
1739         rep_param->failover_accepted = 0;
1740         rep_param->srq = 1;
1741         rep_param->responder_resources = 4;
1742         rep_param->initiator_depth = 4;
1743
1744         ret = ib_send_cm_rep(cm_id, rep_param);
1745         if (ret)
1746                 goto release_channel;
1747
1748         spin_lock_irq(&sdev->spinlock);
1749         list_add_tail(&ch->list, &sdev->rch_list);
1750         spin_unlock_irq(&sdev->spinlock);
1751
1752         goto out;
1753
1754 release_channel:
1755         scst_unregister_session(ch->scst_sess, 0, NULL);
1756         ch->scst_sess = NULL;
1757
1758 destroy_ib:
1759         ib_destroy_qp(ch->qp);
1760         ib_destroy_cq(ch->cq);
1761
1762 free_ch:
1763         kfree(ch);
1764
1765 reject:
1766         rej->opcode = SRP_LOGIN_REJ;
1767         rej->tag = req->tag;
1768         rej->buf_fmt =
1769             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1770
1771         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1772                              (void *)rej, sizeof *rej);
1773
1774 out:
1775         kfree(rep_param);
1776         kfree(rsp);
1777         kfree(rej);
1778
1779         return ret;
1780 }
1781
1782 /**
1783  * Release the channel with the specified cm_id.
1784  *
1785  * Returns one to indicate that the caller of srpt_cm_handler() should destroy
1786  * the cm_id.
1787  */
1788 static void srpt_find_and_release_channel(struct ib_cm_id *cm_id)
1789 {
1790         struct srpt_rdma_ch *ch;
1791
1792         ch = srpt_find_channel(cm_id, true);
1793         if (ch)
1794                 srpt_release_channel(ch, 0);
1795 }
1796
1797 static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
1798 {
1799         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1800         srpt_find_and_release_channel(cm_id);
1801 }
1802
1803 /**
1804  * Process an IB_CM_RTU_RECEIVED or IB_CM_USER_ESTABLISHED event.
1805  *
1806  * An IB_CM_RTU_RECEIVED message indicates that the connection is established
1807  * and that the recipient may begin transmitting (RTU = ready to use).
1808  */
1809 static int srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
1810 {
1811         struct srpt_rdma_ch *ch;
1812         int ret;
1813
1814         ch = srpt_find_channel(cm_id, false);
1815         if (!ch)
1816                 return -EINVAL;
1817
1818         if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_CONNECTING,
1819                                             RDMA_CHANNEL_LIVE)) {
1820                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1821
1822                 ret = srpt_ch_qp_rts(ch, ch->qp);
1823
1824                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
1825                                          wait_list) {
1826                         list_del(&ioctx->wait_list);
1827                         srpt_handle_new_iu(ch, ioctx);
1828                 }
1829                 if (ret && srpt_test_and_set_channel_state(ch,
1830                                         RDMA_CHANNEL_LIVE,
1831                                         RDMA_CHANNEL_DISCONNECTING)) {
1832                         TRACE_DBG("cm_id=%p sess_name=%s state=%d",
1833                                   cm_id, ch->sess_name, ch->state);
1834                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
1835                 }
1836         } else if (ch->state == RDMA_CHANNEL_DISCONNECTING) {
1837                 TRACE_DBG("cm_id=%p sess_name=%s state=%d",
1838                           cm_id, ch->sess_name, ch->state);
1839                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
1840                 ret = -EAGAIN;
1841         } else
1842                 ret = 0;
1843
1844         return ret;
1845 }
1846
1847 static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
1848 {
1849         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1850         srpt_find_and_release_channel(cm_id);
1851 }
1852
1853 static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
1854 {
1855         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1856         srpt_find_and_release_channel(cm_id);
1857 }
1858
1859 static int srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
1860 {
1861         struct srpt_rdma_ch *ch;
1862
1863         ch = srpt_find_channel(cm_id, false);
1864         if (!ch)
1865                 return -EINVAL;
1866
1867         TRACE_DBG("%s: cm_id= %p ch->state= %d",
1868                  __func__, cm_id, ch->state);
1869
1870         switch (ch->state) {
1871         case RDMA_CHANNEL_LIVE:
1872         case RDMA_CHANNEL_CONNECTING:
1873                 ib_send_cm_drep(ch->cm_id, NULL, 0);
1874                 break;
1875         case RDMA_CHANNEL_DISCONNECTING:
1876         default:
1877                 break;
1878         }
1879
1880         return 0;
1881 }
1882
1883 static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
1884 {
1885         TRACE_DBG("%s: cm_id=%p", __func__, cm_id);
1886         srpt_find_and_release_channel(cm_id);
1887 }
1888
1889 /**
1890  * IB connection manager callback function.
1891  *
1892  * A non-zero return value will make the caller destroy the CM ID.
1893  *
1894  * Note: srpt_add_one passes a struct srpt_device* as the third argument to
1895  * the ib_create_cm_id() call.
1896  */
1897 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1898 {
1899         int ret = 0;
1900
1901         switch (event->event) {
1902         case IB_CM_REQ_RECEIVED:
1903                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
1904                                        event->private_data);
1905                 break;
1906         case IB_CM_REJ_RECEIVED:
1907                 srpt_cm_rej_recv(cm_id);
1908                 ret = -EINVAL;
1909                 break;
1910         case IB_CM_RTU_RECEIVED:
1911         case IB_CM_USER_ESTABLISHED:
1912                 ret = srpt_cm_rtu_recv(cm_id);
1913                 break;
1914         case IB_CM_DREQ_RECEIVED:
1915                 ret = srpt_cm_dreq_recv(cm_id);
1916                 break;
1917         case IB_CM_DREP_RECEIVED:
1918                 srpt_cm_drep_recv(cm_id);
1919                 ret = -EINVAL;
1920                 break;
1921         case IB_CM_TIMEWAIT_EXIT:
1922                 srpt_cm_timewait_exit(cm_id);
1923                 ret = -EINVAL;
1924                 break;
1925         case IB_CM_REP_ERROR:
1926                 srpt_cm_rep_error(cm_id);
1927                 ret = -EINVAL;
1928                 break;
1929         default:
1930                 break;
1931         }
1932
1933         return ret;
1934 }
1935
1936 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1937                                  struct srpt_ioctx *ioctx,
1938                                  struct scst_cmd *scmnd)
1939 {
1940         struct scatterlist *scat;
1941         scst_data_direction dir;
1942         struct rdma_iu *riu;
1943         struct srp_direct_buf *db;
1944         dma_addr_t dma_addr;
1945         struct ib_sge *sge;
1946         u64 raddr;
1947         u32 rsize;
1948         u32 tsize;
1949         u32 dma_len;
1950         int count, nrdma;
1951         int i, j, k;
1952
1953         scat = scst_cmd_get_sg(scmnd);
1954         dir = scst_cmd_get_data_direction(scmnd);
1955         count = dma_map_sg(ch->sport->sdev->device->dma_device, scat,
1956                            scst_cmd_get_sg_cnt(scmnd),
1957                            scst_to_tgt_dma_dir(dir));
1958         if (unlikely(!count))
1959                 return -EBUSY;
1960
1961         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
1962                 nrdma = ioctx->n_rdma_ius;
1963         else {
1964                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
1965
1966                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
1967                                           scst_cmd_atomic(scmnd)
1968                                           ? GFP_ATOMIC : GFP_KERNEL);
1969                 if (!ioctx->rdma_ius) {
1970                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1971                                      scat, scst_cmd_get_sg_cnt(scmnd),
1972                                      scst_to_tgt_dma_dir(dir));
1973                         return -ENOMEM;
1974                 }
1975
1976                 ioctx->n_rdma_ius = nrdma;
1977         }
1978
1979         db = ioctx->rbufs;
1980         tsize = (dir == SCST_DATA_READ) ?
1981                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1982         dma_len = sg_dma_len(&scat[0]);
1983         riu = ioctx->rdma_ius;
1984
1985         /*
1986          * For each remote desc - calculate the #ib_sge.
1987          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
1988          *      each remote desc rdma_iu is required a rdma wr;
1989          * else
1990          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
1991          *      another rdma wr
1992          */
1993         for (i = 0, j = 0;
1994              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1995                 rsize = be32_to_cpu(db->len);
1996                 raddr = be64_to_cpu(db->va);
1997                 riu->raddr = raddr;
1998                 riu->rkey = be32_to_cpu(db->key);
1999                 riu->sge_cnt = 0;
2000
2001                 /* calculate how many sge required for this remote_buf */
2002                 while (rsize > 0 && tsize > 0) {
2003
2004                         if (rsize >= dma_len) {
2005                                 tsize -= dma_len;
2006                                 rsize -= dma_len;
2007                                 raddr += dma_len;
2008
2009                                 if (tsize > 0) {
2010                                         ++j;
2011                                         if (j < count)
2012                                                 dma_len = sg_dma_len(&scat[j]);
2013                                 }
2014                         } else {
2015                                 tsize -= rsize;
2016                                 dma_len -= rsize;
2017                                 rsize = 0;
2018                         }
2019
2020                         ++riu->sge_cnt;
2021
2022                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
2023                                 riu->sge =
2024                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
2025                                             scst_cmd_atomic(scmnd)
2026                                             ? GFP_ATOMIC : GFP_KERNEL);
2027                                 if (!riu->sge)
2028                                         goto free_mem;
2029
2030                                 ++ioctx->n_rdma;
2031                                 ++riu;
2032                                 riu->sge_cnt = 0;
2033                                 riu->raddr = raddr;
2034                                 riu->rkey = be32_to_cpu(db->key);
2035                         }
2036                 }
2037
2038                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
2039                                    scst_cmd_atomic(scmnd)
2040                                    ? GFP_ATOMIC : GFP_KERNEL);
2041
2042                 if (!riu->sge)
2043                         goto free_mem;
2044
2045                 ++ioctx->n_rdma;
2046         }
2047
2048         db = ioctx->rbufs;
2049         scat = scst_cmd_get_sg(scmnd);
2050         tsize = (dir == SCST_DATA_READ) ?
2051                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2052         riu = ioctx->rdma_ius;
2053         dma_len = sg_dma_len(&scat[0]);
2054         dma_addr = sg_dma_address(&scat[0]);
2055
2056         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
2057         for (i = 0, j = 0;
2058              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2059                 rsize = be32_to_cpu(db->len);
2060                 sge = riu->sge;
2061                 k = 0;
2062
2063                 while (rsize > 0 && tsize > 0) {
2064                         sge->addr = dma_addr;
2065                         sge->lkey = ch->sport->sdev->mr->lkey;
2066
2067                         if (rsize >= dma_len) {
2068                                 sge->length =
2069                                         (tsize < dma_len) ? tsize : dma_len;
2070                                 tsize -= dma_len;
2071                                 rsize -= dma_len;
2072
2073                                 if (tsize > 0) {
2074                                         ++j;
2075                                         if (j < count) {
2076                                                 dma_len = sg_dma_len(&scat[j]);
2077                                                 dma_addr =
2078                                                     sg_dma_address(&scat[j]);
2079                                         }
2080                                 }
2081                         } else {
2082                                 sge->length = (tsize < rsize) ? tsize : rsize;
2083                                 tsize -= rsize;
2084                                 dma_len -= rsize;
2085                                 dma_addr += rsize;
2086                                 rsize = 0;
2087                         }
2088
2089                         ++k;
2090                         if (k == riu->sge_cnt && rsize > 0) {
2091                                 ++riu;
2092                                 sge = riu->sge;
2093                                 k = 0;
2094                         } else if (rsize > 0)
2095                                 ++sge;
2096                 }
2097         }
2098
2099         return 0;
2100
2101 free_mem:
2102         while (ioctx->n_rdma)
2103                 kfree(ioctx->rdma_ius[ioctx->n_rdma--].sge);
2104
2105         kfree(ioctx->rdma_ius);
2106
2107         dma_unmap_sg(ch->sport->sdev->device->dma_device,
2108                      scat, scst_cmd_get_sg_cnt(scmnd),
2109                      scst_to_tgt_dma_dir(dir));
2110
2111         return -ENOMEM;
2112 }
2113
2114 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2115                               scst_data_direction dir)
2116 {
2117         struct ib_send_wr wr;
2118         struct ib_send_wr *bad_wr;
2119         struct rdma_iu *riu;
2120         int i;
2121         int ret = 0;
2122
2123         riu = ioctx->rdma_ius;
2124         memset(&wr, 0, sizeof wr);
2125
2126         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
2127                 wr.opcode = (dir == SCST_DATA_READ) ?
2128                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
2129                 wr.next = NULL;
2130                 wr.wr_id = ioctx->index;
2131                 wr.wr.rdma.remote_addr = riu->raddr;
2132                 wr.wr.rdma.rkey = riu->rkey;
2133                 wr.num_sge = riu->sge_cnt;
2134                 wr.sg_list = riu->sge;
2135
2136                 /* only get completion event for the last rdma wr */
2137                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
2138                         wr.send_flags = IB_SEND_SIGNALED;
2139
2140                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
2141                 if (ret)
2142                         break;
2143         }
2144
2145         return ret;
2146 }
2147
2148 /*
2149  * Start data reception. Must not block.
2150  */
2151 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2152                           struct scst_cmd *scmnd)
2153 {
2154         int ret;
2155
2156         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
2157         if (ret) {
2158                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
2159                 ret = SCST_TGT_RES_QUEUE_FULL;
2160                 goto out;
2161         }
2162
2163         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
2164         if (ret) {
2165                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
2166                 if (ret == -EAGAIN || ret == -ENOMEM)
2167                         ret = SCST_TGT_RES_QUEUE_FULL;
2168                 else
2169                         ret = SCST_TGT_RES_FATAL_ERROR;
2170                 goto out;
2171         }
2172
2173         ret = SCST_TGT_RES_SUCCESS;
2174
2175 out:
2176         return ret;
2177 }
2178
2179 /*
2180  * Called by the SCST core to inform ib_srpt that data reception should start.
2181  * Must not block.
2182  */
2183 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
2184 {
2185         struct srpt_rdma_ch *ch;
2186         struct srpt_ioctx *ioctx;
2187
2188         ioctx = scst_cmd_get_tgt_priv(scmnd);
2189         BUG_ON(!ioctx);
2190
2191         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2192         BUG_ON(!ch);
2193
2194         if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2195                 return SCST_TGT_RES_FATAL_ERROR;
2196         else if (ch->state == RDMA_CHANNEL_CONNECTING)
2197                 return SCST_TGT_RES_QUEUE_FULL;
2198
2199         ioctx->state = SRPT_STATE_NEED_DATA;
2200
2201         return srpt_xfer_data(ch, ioctx, scmnd);
2202 }
2203
2204 /*
2205  * Called by the SCST core. Transmits the response buffer and status held in
2206  * 'scmnd'. Must not block.
2207  */
2208 static int srpt_xmit_response(struct scst_cmd *scmnd)
2209 {
2210         struct srpt_rdma_ch *ch;
2211         struct srpt_ioctx *ioctx;
2212         struct srp_rsp *srp_rsp;
2213         u64 tag;
2214         int ret = SCST_TGT_RES_SUCCESS;
2215         int dir;
2216         int status;
2217
2218         ioctx = scst_cmd_get_tgt_priv(scmnd);
2219         BUG_ON(!ioctx);
2220
2221         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2222         BUG_ON(!ch);
2223
2224         tag = scst_cmd_get_tag(scmnd);
2225
2226         if (ch->state != RDMA_CHANNEL_LIVE) {
2227                 printk(KERN_ERR PFX
2228                        "%s: tag= %lld channel in bad state %d\n",
2229                        __func__, (unsigned long long)tag, ch->state);
2230
2231                 if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2232                         ret = SCST_TGT_RES_FATAL_ERROR;
2233                 else if (ch->state == RDMA_CHANNEL_CONNECTING)
2234                         ret = SCST_TGT_RES_QUEUE_FULL;
2235
2236                 if (unlikely(scst_cmd_aborted(scmnd)))
2237                         goto out_aborted;
2238
2239                 goto out;
2240         }
2241
2242         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
2243                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
2244
2245         srp_rsp = ioctx->buf;
2246
2247         if (unlikely(scst_cmd_aborted(scmnd))) {
2248                 printk(KERN_ERR PFX
2249                        "%s: tag= %lld already get aborted\n",
2250                        __func__, (unsigned long long)tag);
2251                 goto out_aborted;
2252         }
2253
2254         dir = scst_cmd_get_data_direction(scmnd);
2255         status = scst_cmd_get_status(scmnd) & 0xff;
2256
2257         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2258
2259         if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2260                 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2261                 if (srp_rsp->sense_data_len >
2262                     (MAX_MESSAGE_SIZE - sizeof *srp_rsp))
2263                         srp_rsp->sense_data_len =
2264                             MAX_MESSAGE_SIZE - sizeof *srp_rsp;
2265
2266                 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2267                        srp_rsp->sense_data_len);
2268
2269                 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2270                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2271
2272                 if (!status)
2273                         status = SAM_STAT_CHECK_CONDITION;
2274         }
2275
2276         srp_rsp->status = status;
2277
2278         /* transfer read data if any */
2279         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2280                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2281                 if (ret != SCST_TGT_RES_SUCCESS) {
2282                         printk(KERN_ERR PFX
2283                                "%s: tag= %lld xfer_data failed\n",
2284                                __func__, (unsigned long long)tag);
2285                         goto out;
2286                 }
2287         }
2288
2289         ioctx->state = SRPT_STATE_PROCESSED;
2290
2291         if (srpt_post_send(ch, ioctx,
2292                            sizeof *srp_rsp +
2293                            be32_to_cpu(srp_rsp->sense_data_len))) {
2294                 printk(KERN_ERR PFX "%s: ch->state= %d tag= %lld\n",
2295                        __func__, ch->state,
2296                        (unsigned long long)tag);
2297                 ret = SCST_TGT_RES_FATAL_ERROR;
2298         }
2299
2300 out:
2301         return ret;
2302
2303 out_aborted:
2304         ret = SCST_TGT_RES_SUCCESS;
2305         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
2306         ioctx->state = SRPT_STATE_ABORTED;
2307         scst_tgt_cmd_done(scmnd, SCST_CONTEXT_SAME);
2308         goto out;
2309 }
2310
2311 /*
2312  * Called by the SCST core to inform ib_srpt that a received task management
2313  * function has been completed. Must not block.
2314  */
2315 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2316 {
2317         struct srpt_rdma_ch *ch;
2318         struct srpt_mgmt_ioctx *mgmt_ioctx;
2319         struct srpt_ioctx *ioctx;
2320         int rsp_len;
2321
2322         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2323         BUG_ON(!mgmt_ioctx);
2324
2325         ch = mgmt_ioctx->ch;
2326         BUG_ON(!ch);
2327
2328         ioctx = mgmt_ioctx->ioctx;
2329         BUG_ON(!ioctx);
2330
2331         TRACE_DBG("%s: tsk_mgmt_done for tag= %lld status=%d\n",
2332                   __func__, (unsigned long long)mgmt_ioctx->tag,
2333                   scst_mgmt_cmd_get_status(mcmnd));
2334
2335         rsp_len = srpt_build_tskmgmt_rsp(ch, ioctx,
2336                                          (scst_mgmt_cmd_get_status(mcmnd) ==
2337                                           SCST_MGMT_STATUS_SUCCESS) ?
2338                                          SRP_TSK_MGMT_SUCCESS :
2339                                          SRP_TSK_MGMT_FAILED,
2340                                          mgmt_ioctx->tag);
2341         srpt_post_send(ch, ioctx, rsp_len);
2342
2343         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2344
2345         kfree(mgmt_ioctx);
2346 }
2347
2348 /*
2349  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2350  * to be freed. May be called in IRQ context.
2351  */
2352 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2353 {
2354         struct srpt_rdma_ch *ch;
2355         struct srpt_ioctx *ioctx;
2356
2357         ioctx = scst_cmd_get_tgt_priv(scmnd);
2358         BUG_ON(!ioctx);
2359
2360         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2361         BUG_ON(!ch);
2362
2363         spin_lock_irq(&ch->spinlock);
2364         list_del(&ioctx->scmnd_list);
2365         ch->active_scmnd_cnt--;
2366         spin_unlock_irq(&ch->spinlock);
2367
2368         srpt_reset_ioctx(ch, ioctx);
2369         scst_cmd_set_tgt_priv(scmnd, NULL);
2370 }
2371
2372 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2373 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2374 static void srpt_refresh_port_work(void *ctx)
2375 #else
2376 static void srpt_refresh_port_work(struct work_struct *work)
2377 #endif
2378 {
2379 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2380         struct srpt_port *sport = (struct srpt_port *)ctx;
2381 #else
2382         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2383 #endif
2384
2385         srpt_refresh_port(sport);
2386 }
2387
2388 /*
2389  * Called by the SCST core to detect target adapters. Returns the number of
2390  * detected target adapters.
2391  */
2392 static int srpt_detect(struct scst_tgt_template *tp)
2393 {
2394         int device_count;
2395
2396         TRACE_ENTRY();
2397
2398         device_count = atomic_read(&srpt_device_count);
2399
2400         TRACE_EXIT_RES(device_count);
2401
2402         return device_count;
2403 }
2404
2405 /*
2406  * Callback function called by the SCST core from scst_unregister() to free up
2407  * the resources associated with device scst_tgt.
2408  */
2409 static int srpt_release(struct scst_tgt *scst_tgt)
2410 {
2411         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2412         struct srpt_rdma_ch *ch, *tmp_ch;
2413
2414         TRACE_ENTRY();
2415
2416         BUG_ON(!scst_tgt);
2417 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2418         WARN_ON(!sdev);
2419         if (!sdev)
2420                 return -ENODEV;
2421 #else
2422         if (WARN_ON(!sdev))
2423                 return -ENODEV;
2424 #endif
2425
2426         srpt_unregister_procfs_entry(scst_tgt->tgtt);
2427
2428         spin_lock_irq(&sdev->spinlock);
2429         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
2430                 list_del(&ch->list);
2431                 spin_unlock_irq(&sdev->spinlock);
2432                 srpt_release_channel(ch, 1);
2433                 spin_lock_irq(&sdev->spinlock);
2434         }
2435         spin_unlock_irq(&sdev->spinlock);
2436
2437         srpt_unregister_mad_agent(sdev);
2438
2439         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2440
2441         TRACE_EXIT();
2442
2443         return 0;
2444 }
2445
2446 /*
2447  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2448  * when the module parameter 'thread' is not zero (the default is zero).
2449  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2450  *
2451  * @pre thread != 0
2452  */
2453 static int srpt_ioctx_thread(void *arg)
2454 {
2455         struct srpt_ioctx *ioctx;
2456
2457         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2458         current->flags |= PF_NOFREEZE;
2459
2460         spin_lock_irq(&srpt_thread.thread_lock);
2461         while (!kthread_should_stop()) {
2462                 wait_queue_t wait;
2463                 init_waitqueue_entry(&wait, current);
2464
2465                 if (!srpt_test_ioctx_list()) {
2466                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2467
2468                         for (;;) {
2469                                 set_current_state(TASK_INTERRUPTIBLE);
2470                                 if (srpt_test_ioctx_list())
2471                                         break;
2472                                 spin_unlock_irq(&srpt_thread.thread_lock);
2473                                 schedule();
2474                                 spin_lock_irq(&srpt_thread.thread_lock);
2475                         }
2476                         set_current_state(TASK_RUNNING);
2477                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2478                 }
2479
2480                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2481                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2482                                            struct srpt_ioctx, comp_list);
2483
2484                         list_del(&ioctx->comp_list);
2485
2486                         spin_unlock_irq(&srpt_thread.thread_lock);
2487                         switch (ioctx->op) {
2488                         case IB_WC_SEND:
2489                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2490                                         SCST_CONTEXT_DIRECT);
2491                                 break;
2492                         case IB_WC_RDMA_WRITE:
2493                         case IB_WC_RDMA_READ:
2494                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2495                                 break;
2496                         case IB_WC_RECV:
2497                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2498                                 break;
2499                         default:
2500                                 break;
2501                         }
2502                         spin_lock_irq(&srpt_thread.thread_lock);
2503                 }
2504         }
2505         spin_unlock_irq(&srpt_thread.thread_lock);
2506
2507         return 0;
2508 }
2509
2510 /* SCST target template for the SRP target implementation. */
2511 static struct scst_tgt_template srpt_template = {
2512         .name = DRV_NAME,
2513         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2514         .xmit_response_atomic = 1,
2515         .rdy_to_xfer_atomic = 1,
2516         .no_proc_entry = 0,
2517         .detect = srpt_detect,
2518         .release = srpt_release,
2519         .xmit_response = srpt_xmit_response,
2520         .rdy_to_xfer = srpt_rdy_to_xfer,
2521         .on_free_cmd = srpt_on_free_cmd,
2522         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2523 };
2524
2525 /*
2526  * The callback function srpt_release_class_dev() is called whenever a
2527  * device is removed from the /sys/class/infiniband_srpt device class.
2528  */
2529 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2530 static void srpt_release_class_dev(struct class_device *class_dev)
2531 #else
2532 static void srpt_release_class_dev(struct device *dev)
2533 #endif
2534 {
2535 }
2536
2537 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2538 static int srpt_trace_level_show(struct seq_file *seq, void *v)
2539 {
2540         return scst_proc_log_entry_read(seq, trace_flag, NULL);
2541 }
2542
2543 static ssize_t srpt_proc_trace_level_write(struct file *file,
2544         const char __user *buf, size_t length, loff_t *off)
2545 {
2546         return scst_proc_log_entry_write(file, buf, length, &trace_flag,
2547                 DEFAULT_SRPT_TRACE_FLAGS, NULL);
2548 }
2549
2550 static struct scst_proc_data srpt_log_proc_data = {
2551         SCST_DEF_RW_SEQ_OP(srpt_proc_trace_level_write)
2552         .show = srpt_trace_level_show,
2553 };
2554 #endif
2555
2556 static struct class_attribute srpt_class_attrs[] = {
2557         __ATTR_NULL,
2558 };
2559
2560 static struct class srpt_class = {
2561         .name = "infiniband_srpt",
2562 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2563         .release = srpt_release_class_dev,
2564 #else
2565         .dev_release = srpt_release_class_dev,
2566 #endif
2567         .class_attrs = srpt_class_attrs,
2568 };
2569
2570 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2571 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2572 #else
2573 static ssize_t show_login_info(struct device *dev,
2574                                struct device_attribute *attr, char *buf)
2575 #endif
2576 {
2577         struct srpt_device *sdev =
2578 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2579                 container_of(class_dev, struct srpt_device, class_dev);
2580 #else
2581                 container_of(dev, struct srpt_device, dev);
2582 #endif
2583         struct srpt_port *sport;
2584         int i;
2585         int len = 0;
2586
2587         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2588                 sport = &sdev->port[i];
2589
2590                 len += sprintf(buf + len,
2591                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2592                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2593                                "service_id=%016llx\n",
2594                                (unsigned long long) mellanox_ioc_guid,
2595                                (unsigned long long) mellanox_ioc_guid,
2596                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2597                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2598                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2599                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2600                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2601                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2602                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2603                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2604                                (unsigned long long) mellanox_ioc_guid);
2605         }
2606
2607         return len;
2608 }
2609
2610 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2611 static CLASS_DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2612 #else
2613 static DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2614 #endif
2615
2616 /*
2617  * Callback function called by the InfiniBand core when either an InfiniBand
2618  * device has been added or during the ib_register_client() call for each
2619  * registered InfiniBand device.
2620  */
2621 static void srpt_add_one(struct ib_device *device)
2622 {
2623         struct srpt_device *sdev;
2624         struct srpt_port *sport;
2625         struct ib_srq_init_attr srq_attr;
2626         int i;
2627
2628         TRACE_ENTRY();
2629
2630         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2631         if (!sdev)
2632                 return;
2633
2634         sdev->device = device;
2635
2636 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2637         sdev->class_dev.class = &srpt_class;
2638         sdev->class_dev.dev = device->dma_device;
2639         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2640                  "srpt-%s", device->name);
2641 #else
2642         sdev->dev.class = &srpt_class;
2643         sdev->dev.parent = device->dma_device;
2644 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2645         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2646 #else
2647         snprintf(sdev->init_name, sizeof(sdev->init_name),
2648                  "srpt-%s", device->name);
2649         sdev->dev.init_name = sdev->init_name;
2650 #endif
2651 #endif
2652
2653 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2654         if (class_device_register(&sdev->class_dev))
2655                 goto free_dev;
2656         if (class_device_create_file(&sdev->class_dev,
2657                                      &class_device_attr_login_info))
2658                 goto err_dev;
2659 #else
2660         if (device_register(&sdev->dev))
2661                 goto free_dev;
2662         if (device_create_file(&sdev->dev, &dev_attr_login_info))
2663                 goto err_dev;
2664 #endif
2665
2666         if (ib_query_device(device, &sdev->dev_attr))
2667                 goto err_dev;
2668
2669         sdev->pd = ib_alloc_pd(device);
2670         if (IS_ERR(sdev->pd))
2671                 goto err_dev;
2672
2673         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2674         if (IS_ERR(sdev->mr))
2675                 goto err_pd;
2676
2677         srq_attr.event_handler = srpt_srq_event;
2678         srq_attr.srq_context = (void *)sdev;
2679         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2680         srq_attr.attr.max_sge = 1;
2681         srq_attr.attr.srq_limit = 0;
2682
2683         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2684         if (IS_ERR(sdev->srq))
2685                 goto err_mr;
2686
2687         TRACE_DBG("%s: create SRQ #wr= %d max_allow=%d dev= %s",
2688                __func__, srq_attr.attr.max_wr,
2689               sdev->dev_attr.max_srq_wr, device->name);
2690
2691         if (!mellanox_ioc_guid)
2692                 mellanox_ioc_guid = be64_to_cpu(device->node_guid);
2693
2694         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2695         if (IS_ERR(sdev->cm_id))
2696                 goto err_srq;
2697
2698         /* print out target login information */
2699         TRACE_DBG("Target login info: id_ext=%016llx,"
2700                   "ioc_guid=%016llx,pkey=ffff,service_id=%016llx",
2701                   (unsigned long long) mellanox_ioc_guid,
2702                   (unsigned long long) mellanox_ioc_guid,
2703                   (unsigned long long) mellanox_ioc_guid);
2704
2705         /*
2706          * We do not have a consistent service_id (ie. also id_ext of target_id)
2707          * to identify this target. We currently use the guid of the first HCA
2708          * in the system as service_id; therefore, the target_id will change
2709          * if this HCA is gone bad and replaced by different HCA
2710          */
2711         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(mellanox_ioc_guid), 0, NULL))
2712                 goto err_cm;
2713
2714         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2715                               srpt_event_handler);
2716         if (ib_register_event_handler(&sdev->event_handler))
2717                 goto err_cm;
2718
2719         if (srpt_alloc_ioctx_ring(sdev))
2720                 goto err_event;
2721
2722         INIT_LIST_HEAD(&sdev->rch_list);
2723         spin_lock_init(&sdev->spinlock);
2724
2725         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2726                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2727
2728         ib_set_client_data(device, &srpt_client, sdev);
2729
2730         sdev->scst_tgt = scst_register(&srpt_template, NULL);
2731         if (!sdev->scst_tgt) {
2732                 printk(KERN_ERR PFX "SCST registration failed for %s.\n",
2733                         sdev->device->name);
2734                 goto err_ring;
2735         }
2736
2737         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
2738
2739         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
2740                 sport = &sdev->port[i - 1];
2741                 sport->sdev = sdev;
2742                 sport->port = i;
2743 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2744                 /*
2745                  * A vanilla 2.6.19 or older kernel without backported OFED
2746                  * kernel headers.
2747                  */
2748                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
2749 #else
2750                 INIT_WORK(&sport->work, srpt_refresh_port_work);
2751 #endif
2752                 if (srpt_refresh_port(sport)) {
2753                         printk(KERN_ERR PFX "MAD registration failed"
2754                                " for %s-%d.\n", sdev->device->name, i);
2755                         goto err_refresh_port;
2756                 }
2757         }
2758
2759         atomic_inc(&srpt_device_count);
2760
2761         TRACE_EXIT();
2762
2763         return;
2764
2765 err_refresh_port:
2766         scst_unregister(sdev->scst_tgt);
2767 err_ring:
2768         ib_set_client_data(device, &srpt_client, NULL);
2769         srpt_free_ioctx_ring(sdev);
2770 err_event:
2771         ib_unregister_event_handler(&sdev->event_handler);
2772 err_cm:
2773         ib_destroy_cm_id(sdev->cm_id);
2774 err_srq:
2775         ib_destroy_srq(sdev->srq);
2776 err_mr:
2777         ib_dereg_mr(sdev->mr);
2778 err_pd:
2779         ib_dealloc_pd(sdev->pd);
2780 err_dev:
2781 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2782         class_device_unregister(&sdev->class_dev);
2783 #else
2784         device_unregister(&sdev->dev);
2785 #endif
2786 free_dev:
2787         kfree(sdev);
2788
2789         TRACE_EXIT();
2790 }
2791
2792 /*
2793  * Callback function called by the InfiniBand core when either an InfiniBand
2794  * device has been removed or during the ib_unregister_client() call for each
2795  * registered InfiniBand device.
2796  */
2797 static void srpt_remove_one(struct ib_device *device)
2798 {
2799         int i;
2800         struct srpt_device *sdev;
2801
2802         TRACE_ENTRY();
2803
2804         sdev = ib_get_client_data(device, &srpt_client);
2805 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2806         WARN_ON(!sdev);
2807         if (!sdev)
2808                 return;
2809 #else
2810         if (WARN_ON(!sdev))
2811                 return;
2812 #endif
2813
2814         /*
2815          * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
2816          * finished if it is running.
2817          */
2818         for (i = 0; i < sdev->device->phys_port_cnt; i++)
2819 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
2820                 cancel_work_sync(&sdev->port[i].work);
2821 #else
2822                 /*
2823                  * cancel_work_sync() was introduced in kernel 2.6.22. Older
2824                  * kernels do not have a facility to cancel scheduled work.
2825                  */
2826                 printk(KERN_ERR PFX
2827                        "your kernel does not provide cancel_work_sync().\n");
2828 #endif
2829
2830         scst_unregister(sdev->scst_tgt);
2831         sdev->scst_tgt = NULL;
2832
2833         ib_unregister_event_handler(&sdev->event_handler);
2834         ib_destroy_cm_id(sdev->cm_id);
2835         ib_destroy_srq(sdev->srq);
2836         ib_dereg_mr(sdev->mr);
2837         ib_dealloc_pd(sdev->pd);
2838 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2839         class_device_unregister(&sdev->class_dev);
2840 #else
2841         device_unregister(&sdev->dev);
2842 #endif
2843
2844         srpt_free_ioctx_ring(sdev);
2845         kfree(sdev);
2846
2847         TRACE_EXIT();
2848 }
2849
2850 /**
2851  * Create procfs entries for srpt. Currently the only procfs entry created
2852  * by this function is the "trace_level" entry.
2853  */
2854 static int srpt_register_procfs_entry(struct scst_tgt_template *tgt)
2855 {
2856         int res = 0;
2857 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2858         struct proc_dir_entry *p, *root;
2859
2860         root = scst_proc_get_tgt_root(tgt);
2861         WARN_ON(!root);
2862         if (root) {
2863                 /*
2864                  * Fill in the scst_proc_data::data pointer, which is used in
2865                  * a printk(KERN_INFO ...) statement in
2866                  * scst_proc_log_entry_write() in scst_proc.c.
2867                  */
2868                 srpt_log_proc_data.data = (char *)tgt->name;
2869                 p = scst_create_proc_entry(root, SRPT_PROC_TRACE_LEVEL_NAME,
2870                                            &srpt_log_proc_data);
2871                 if (!p)
2872                         res = -ENOMEM;
2873         } else
2874                 res = -ENOMEM;
2875
2876 #endif
2877         return res;
2878 }
2879
2880 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt)
2881 {
2882 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2883         struct proc_dir_entry *root;
2884
2885         root = scst_proc_get_tgt_root(tgt);
2886         WARN_ON(!root);
2887         if (root)
2888                 remove_proc_entry(SRPT_PROC_TRACE_LEVEL_NAME, root);
2889 #endif
2890 }
2891
2892 /*
2893  * Module initialization.
2894  *
2895  * Note: since ib_register_client() registers callback functions, and since at
2896  * least one of these callback functions (srpt_add_one()) calls SCST functions,
2897  * the SCST target template must be registered before ib_register_client() is
2898  * called.
2899  */
2900 static int __init srpt_init_module(void)
2901 {
2902         int ret;
2903
2904         ret = class_register(&srpt_class);
2905         if (ret) {
2906                 printk(KERN_ERR PFX "couldn't register class ib_srpt\n");
2907                 goto out;
2908         }
2909
2910         ret = scst_register_target_template(&srpt_template);
2911         if (ret < 0) {
2912                 printk(KERN_ERR PFX "couldn't register with scst\n");
2913                 ret = -ENODEV;
2914                 goto out_unregister_class;
2915         }
2916
2917         ret = srpt_register_procfs_entry(&srpt_template);
2918         if (ret) {
2919                 printk(KERN_ERR PFX "couldn't register procfs entry\n");
2920                 goto out_unregister_target;
2921         }
2922
2923         ret = ib_register_client(&srpt_client);
2924         if (ret) {
2925                 printk(KERN_ERR PFX "couldn't register IB client\n");
2926                 goto out_unregister_target;
2927         }
2928
2929         if (thread) {
2930                 spin_lock_init(&srpt_thread.thread_lock);
2931                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
2932                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
2933                                                  NULL, "srpt_thread");
2934                 if (IS_ERR(srpt_thread.thread)) {
2935                         srpt_thread.thread = NULL;
2936                         thread = 0;
2937                 }
2938         }
2939
2940         return 0;
2941
2942 out_unregister_target:
2943         /*
2944          * Note: the procfs entry is unregistered in srpt_release(), which is
2945          * called by scst_unregister_target_template().
2946          */
2947         scst_unregister_target_template(&srpt_template);
2948 out_unregister_class:
2949         class_unregister(&srpt_class);
2950 out:
2951         return ret;
2952 }
2953
2954 static void __exit srpt_cleanup_module(void)
2955 {
2956         TRACE_ENTRY();
2957
2958         if (srpt_thread.thread)
2959                 kthread_stop(srpt_thread.thread);
2960         ib_unregister_client(&srpt_client);
2961         scst_unregister_target_template(&srpt_template);
2962         class_unregister(&srpt_class);
2963
2964         TRACE_EXIT();
2965 }
2966
2967 module_init(srpt_init_module);
2968 module_exit(srpt_cleanup_module);