Update: the __same_type() macro has been introduced in kernel version 2.6.31, not...
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43 #include <linux/delay.h>
44 #include <asm/atomic.h>
45 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
46 #include <linux/proc_fs.h>
47 #include <linux/seq_file.h>
48 #endif
49 #include "ib_srpt.h"
50 #include "scst_debug.h"
51
52 #define CONFIG_SCST_PROC
53
54 /* Name of this kernel module. */
55 #define DRV_NAME                "ib_srpt"
56 /* Prefix for printk() kernel messages. */
57 #define LOG_PFX                 DRV_NAME ": "
58 #define DRV_VERSION             "1.0.1"
59 #define DRV_RELDATE             "July 10, 2008"
60 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
61 /* Flags to be used in SCST debug tracing statements. */
62 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
63                                   | TRACE_MGMT | TRACE_SPECIAL)
64 /* Name of the entry that will be created under /proc/scsi_tgt/ib_srpt. */
65 #define SRPT_PROC_TRACE_LEVEL_NAME      "trace_level"
66 #endif
67
68 #define MELLANOX_SRPT_ID_STRING "SCST SRP target"
69
70 MODULE_AUTHOR("Vu Pham");
71 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
72                    "v" DRV_VERSION " (" DRV_RELDATE ")");
73 MODULE_LICENSE("Dual BSD/GPL");
74
75 struct srpt_thread {
76         /* Protects thread_ioctx_list. */
77         spinlock_t thread_lock;
78         /* I/O contexts to be processed by the kernel thread. */
79         struct list_head thread_ioctx_list;
80         /* SRPT kernel thread. */
81         struct task_struct *thread;
82 };
83
84 /*
85  * Global Variables
86  */
87
88 static u64 srpt_service_guid;
89 /* List of srpt_device structures. */
90 static atomic_t srpt_device_count;
91 static int use_port_guid_in_session_name;
92 static int thread = 1;
93 static struct srpt_thread srpt_thread;
94 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
95 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
96 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
97 module_param(trace_flag, long, 0644);
98 MODULE_PARM_DESC(trace_flag,
99                  "Trace flags for the ib_srpt kernel module.");
100 #endif
101 #if defined(CONFIG_SCST_DEBUG)
102 static unsigned long interrupt_processing_delay_in_us;
103 module_param(interrupt_processing_delay_in_us, long, 0744);
104 MODULE_PARM_DESC(interrupt_processing_delay_in_us,
105                  "CQ completion handler interrupt delay in microseconds.");
106 static unsigned long thread_processing_delay_in_us;
107 module_param(thread_processing_delay_in_us, long, 0744);
108 MODULE_PARM_DESC(thread_processing_delay_in_us,
109                  "SRP thread processing delay in microseconds.");
110 #endif
111
112 module_param(thread, int, 0444);
113 MODULE_PARM_DESC(thread,
114                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
115                  "where possible.");
116
117 static unsigned int srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE;
118 module_param(srp_max_rdma_size, int, 0744);
119 MODULE_PARM_DESC(thread,
120                  "Maximum size of SRP RDMA transfers for new connections.");
121
122 static unsigned int srp_max_message_size = DEFAULT_MAX_MESSAGE_SIZE;
123 module_param(srp_max_message_size, int, 0444);
124 MODULE_PARM_DESC(thread,
125                  "Maximum size of SRP control messages in bytes.");
126
127 module_param(use_port_guid_in_session_name, bool, 0444);
128 MODULE_PARM_DESC(use_port_guid_in_session_name,
129                  "Use target port ID in the SCST session name such that"
130                  " redundant paths between multiport systems can be masked.");
131
132 static void srpt_add_one(struct ib_device *device);
133 static void srpt_remove_one(struct ib_device *device);
134 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
135 #ifdef CONFIG_SCST_PROC
136 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt);
137 #endif /*CONFIG_SCST_PROC*/
138 static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
139                                     struct srpt_ioctx *ioctx);
140 static void srpt_release_channel(struct scst_session *scst_sess);
141
142 static struct ib_client srpt_client = {
143         .name = DRV_NAME,
144         .add = srpt_add_one,
145         .remove = srpt_remove_one
146 };
147
148 /**
149  * Atomically test and set the channel state.
150  * @ch: RDMA channel.
151  * @old: channel state to compare with.
152  * @new: state to change the channel state to if the current state matches the
153  *       argument 'old'.
154  *
155  * Returns the previous channel state.
156  */
157 static enum rdma_ch_state
158 srpt_test_and_set_channel_state(struct srpt_rdma_ch *ch,
159                                 enum rdma_ch_state old,
160                                 enum rdma_ch_state new)
161 {
162         return atomic_cmpxchg(&ch->state, old, new);
163 }
164
165 /*
166  * Callback function called by the InfiniBand core when an asynchronous IB
167  * event occurs. This callback may occur in interrupt context. See also
168  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
169  * Architecture Specification.
170  */
171 static void srpt_event_handler(struct ib_event_handler *handler,
172                                struct ib_event *event)
173 {
174         struct srpt_device *sdev;
175         struct srpt_port *sport;
176
177         TRACE_ENTRY();
178
179         sdev = ib_get_client_data(event->device, &srpt_client);
180         if (!sdev || sdev->device != event->device)
181                 return;
182
183         TRACE_DBG("ASYNC event= %d on device= %s",
184                   event->event, sdev->device->name);
185
186         switch (event->event) {
187         case IB_EVENT_PORT_ERR:
188                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
189                         sport = &sdev->port[event->element.port_num - 1];
190                         sport->lid = 0;
191                         sport->sm_lid = 0;
192                 }
193                 break;
194         case IB_EVENT_PORT_ACTIVE:
195         case IB_EVENT_LID_CHANGE:
196         case IB_EVENT_PKEY_CHANGE:
197         case IB_EVENT_SM_CHANGE:
198         case IB_EVENT_CLIENT_REREGISTER:
199                 /*
200                  * Refresh port data asynchronously. Note: it is safe to call
201                  * schedule_work() even if &sport->work is already on the
202                  * global workqueue because schedule_work() tests for the
203                  * work_pending() condition before adding &sport->work to the
204                  * global work queue.
205                  */
206                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
207                         sport = &sdev->port[event->element.port_num - 1];
208                         if (!sport->lid && !sport->sm_lid)
209                                 schedule_work(&sport->work);
210                 }
211                 break;
212         default:
213                 break;
214         }
215
216         TRACE_EXIT();
217 }
218
219 /*
220  * Callback function called by the InfiniBand core for SRQ (shared receive
221  * queue) events.
222  */
223 static void srpt_srq_event(struct ib_event *event, void *ctx)
224 {
225         TRACE_ENTRY();
226
227         TRACE_DBG("SRQ event %d", event->event);
228
229         TRACE_EXIT();
230 }
231
232 /*
233  * Callback function called by the InfiniBand core for QP (queue pair) events.
234  */
235 static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
236 {
237         TRACE_DBG("QP event %d on cm_id=%p sess_name=%s state=%d",
238                   event->event, ch->cm_id, ch->sess_name,
239                   atomic_read(&ch->state));
240
241         switch (event->event) {
242         case IB_EVENT_COMM_EST:
243 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
244                 ib_cm_notify(ch->cm_id, event->event);
245 #else
246                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
247                 PRINT_ERROR("%s", "how to perform ib_cm_notify() on a"
248                             " vanilla 2.6.18 kernel ???");
249 #endif
250                 break;
251         case IB_EVENT_QP_LAST_WQE_REACHED:
252                 if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_LIVE,
253                         RDMA_CHANNEL_DISCONNECTING) == RDMA_CHANNEL_LIVE) {
254                         PRINT_INFO("disconnected session %s.", ch->sess_name);
255                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
256                 }
257                 break;
258         default:
259                 break;
260         }
261 }
262
263 /*
264  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
265  * the lowest four bits of value in element slot of the array of four bit
266  * elements called c_list (controller list). The index slot is one-based.
267  *
268  * @pre 1 <= slot && 0 <= value && value < 16
269  */
270 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
271 {
272         u16 id;
273         u8 tmp;
274
275         id = (slot - 1) / 2;
276         if (slot & 0x1) {
277                 tmp = c_list[id] & 0xf;
278                 c_list[id] = (value << 4) | tmp;
279         } else {
280                 tmp = c_list[id] & 0xf0;
281                 c_list[id] = (value & 0xf) | tmp;
282         }
283 }
284
285 /*
286  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
287  * ClassPortInfo in the InfiniBand Architecture Specification.
288  */
289 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
290 {
291         struct ib_class_port_info *cif;
292
293         cif = (struct ib_class_port_info *)mad->data;
294         memset(cif, 0, sizeof *cif);
295         cif->base_version = 1;
296         cif->class_version = 1;
297         cif->resp_time_value = 20;
298
299         mad->mad_hdr.status = 0;
300 }
301
302 /*
303  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
304  * InfiniBand Architecture Specification. See also section B.7,
305  * table B.6 in the T10 SRP r16a document.
306  */
307 static void srpt_get_iou(struct ib_dm_mad *mad)
308 {
309         struct ib_dm_iou_info *ioui;
310         u8 slot;
311         int i;
312
313         ioui = (struct ib_dm_iou_info *)mad->data;
314         ioui->change_id = 1;
315         ioui->max_controllers = 16;
316
317         /* set present for slot 1 and empty for the rest */
318         srpt_set_ioc(ioui->controller_list, 1, 1);
319         for (i = 1, slot = 2; i < 16; i++, slot++)
320                 srpt_set_ioc(ioui->controller_list, slot, 0);
321
322         mad->mad_hdr.status = 0;
323 }
324
325 /*
326  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
327  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
328  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
329  * document.
330  */
331 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
332                          struct ib_dm_mad *mad)
333 {
334         struct ib_dm_ioc_profile *iocp;
335
336         iocp = (struct ib_dm_ioc_profile *)mad->data;
337
338         if (!slot || slot > 16) {
339                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
340                 return;
341         }
342
343         if (slot > 2) {
344                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
345                 return;
346         }
347
348         memset(iocp, 0, sizeof *iocp);
349         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
350         iocp->guid = cpu_to_be64(srpt_service_guid);
351         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
352         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
353         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
354         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
355         iocp->subsys_device_id = 0x0;
356         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
357         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
358         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
359         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
360         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
361         iocp->rdma_read_depth = 4;
362         iocp->send_size = cpu_to_be32(srp_max_message_size);
363         iocp->rdma_size = cpu_to_be32(min(max(srp_max_rdma_size, 256U),
364                                           1U << 24));
365         iocp->num_svc_entries = 1;
366         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
367                 SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
368
369         mad->mad_hdr.status = 0;
370 }
371
372 /*
373  * Device management: write ServiceEntries to mad for the given slot. See also
374  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
375  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
376  */
377 static void srpt_get_svc_entries(u64 ioc_guid,
378                                  u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
379 {
380         struct ib_dm_svc_entries *svc_entries;
381
382         WARN_ON(!ioc_guid);
383
384         if (!slot || slot > 16) {
385                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
386                 return;
387         }
388
389         if (slot > 2 || lo > hi || hi > 1) {
390                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
391                 return;
392         }
393
394         svc_entries = (struct ib_dm_svc_entries *)mad->data;
395         memset(svc_entries, 0, sizeof *svc_entries);
396         svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid);
397         snprintf(svc_entries->service_entries[0].name,
398                  sizeof(svc_entries->service_entries[0].name),
399                  "%s%016llx",
400                  SRP_SERVICE_NAME_PREFIX,
401                  (unsigned long long)ioc_guid);
402
403         mad->mad_hdr.status = 0;
404 }
405
406 /*
407  * Actual processing of a received MAD *rq_mad received through source port *sp
408  * (MAD = InfiniBand management datagram). The response to be sent back is
409  * written to *rsp_mad.
410  */
411 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
412                                  struct ib_dm_mad *rsp_mad)
413 {
414         u16 attr_id;
415         u32 slot;
416         u8 hi, lo;
417
418         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
419         switch (attr_id) {
420         case DM_ATTR_CLASS_PORT_INFO:
421                 srpt_get_class_port_info(rsp_mad);
422                 break;
423         case DM_ATTR_IOU_INFO:
424                 srpt_get_iou(rsp_mad);
425                 break;
426         case DM_ATTR_IOC_PROFILE:
427                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
428                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
429                 break;
430         case DM_ATTR_SVC_ENTRIES:
431                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
432                 hi = (u8) ((slot >> 8) & 0xff);
433                 lo = (u8) (slot & 0xff);
434                 slot = (u16) ((slot >> 16) & 0xffff);
435                 srpt_get_svc_entries(srpt_service_guid,
436                                      slot, hi, lo, rsp_mad);
437                 break;
438         default:
439                 rsp_mad->mad_hdr.status =
440                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
441                 break;
442         }
443 }
444
445 /*
446  * Callback function that is called by the InfiniBand core after transmission of
447  * a MAD. (MAD = management datagram; AH = address handle.)
448  */
449 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
450                                   struct ib_mad_send_wc *mad_wc)
451 {
452         ib_destroy_ah(mad_wc->send_buf->ah);
453         ib_free_send_mad(mad_wc->send_buf);
454 }
455
456 /*
457  * Callback function that is called by the InfiniBand core after reception of
458  * a MAD (management datagram).
459  */
460 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
461                                   struct ib_mad_recv_wc *mad_wc)
462 {
463         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
464         struct ib_ah *ah;
465         struct ib_mad_send_buf *rsp;
466         struct ib_dm_mad *dm_mad;
467
468         if (!mad_wc || !mad_wc->recv_buf.mad)
469                 return;
470
471         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
472                                   mad_wc->recv_buf.grh, mad_agent->port_num);
473         if (IS_ERR(ah))
474                 goto err;
475
476         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
477
478         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
479                                  mad_wc->wc->pkey_index, 0,
480                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
481                                  GFP_KERNEL);
482         if (IS_ERR(rsp))
483                 goto err_rsp;
484
485         rsp->ah = ah;
486
487         dm_mad = rsp->mad;
488         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
489         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
490         dm_mad->mad_hdr.status = 0;
491
492         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
493         case IB_MGMT_METHOD_GET:
494                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
495                 break;
496         case IB_MGMT_METHOD_SET:
497                 dm_mad->mad_hdr.status =
498                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
499                 break;
500         default:
501                 dm_mad->mad_hdr.status =
502                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
503                 break;
504         }
505
506         if (!ib_post_send_mad(rsp, NULL)) {
507                 ib_free_recv_mad(mad_wc);
508                 /* will destroy_ah & free_send_mad in send completion */
509                 return;
510         }
511
512         ib_free_send_mad(rsp);
513
514 err_rsp:
515         ib_destroy_ah(ah);
516 err:
517         ib_free_recv_mad(mad_wc);
518 }
519
520 /*
521  * Enable InfiniBand management datagram processing, update the cached sm_lid,
522  * lid and gid values, and register a callback function for processing MADs
523  * on the specified port. It is safe to call this function more than once for
524  * the same port.
525  */
526 static int srpt_refresh_port(struct srpt_port *sport)
527 {
528         struct ib_mad_reg_req reg_req;
529         struct ib_port_modify port_modify;
530         struct ib_port_attr port_attr;
531         int ret;
532
533         TRACE_ENTRY();
534
535         memset(&port_modify, 0, sizeof port_modify);
536         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
537         port_modify.clr_port_cap_mask = 0;
538
539         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
540         if (ret)
541                 goto err_mod_port;
542
543         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
544         if (ret)
545                 goto err_query_port;
546
547         sport->sm_lid = port_attr.sm_lid;
548         sport->lid = port_attr.lid;
549
550         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
551         if (ret)
552                 goto err_query_port;
553
554         if (!sport->mad_agent) {
555                 memset(&reg_req, 0, sizeof reg_req);
556                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
557                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
558                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
559                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
560
561                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
562                                                          sport->port,
563                                                          IB_QPT_GSI,
564                                                          &reg_req, 0,
565                                                          srpt_mad_send_handler,
566                                                          srpt_mad_recv_handler,
567                                                          sport);
568                 if (IS_ERR(sport->mad_agent)) {
569                         ret = PTR_ERR(sport->mad_agent);
570                         sport->mad_agent = NULL;
571                         goto err_query_port;
572                 }
573         }
574
575         TRACE_EXIT_RES(0);
576
577         return 0;
578
579 err_query_port:
580
581         port_modify.set_port_cap_mask = 0;
582         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
583         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
584
585 err_mod_port:
586
587         TRACE_EXIT_RES(ret);
588
589         return ret;
590 }
591
592 /*
593  * Unregister the callback function for processing MADs and disable MAD
594  * processing for all ports of the specified device. It is safe to call this
595  * function more than once for the same device.
596  */
597 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
598 {
599         struct ib_port_modify port_modify = {
600                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
601         };
602         struct srpt_port *sport;
603         int i;
604
605         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
606                 sport = &sdev->port[i - 1];
607                 WARN_ON(sport->port != i);
608                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
609                         PRINT_ERROR("%s", "disabling MAD processing failed.");
610                 if (sport->mad_agent) {
611                         ib_unregister_mad_agent(sport->mad_agent);
612                         sport->mad_agent = NULL;
613                 }
614         }
615 }
616
617 /**
618  * Allocate and initialize an SRPT I/O context structure.
619  */
620 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
621 {
622         struct srpt_ioctx *ioctx;
623
624         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
625         if (!ioctx)
626                 goto out;
627
628         ioctx->buf = kzalloc(srp_max_message_size, GFP_KERNEL);
629         if (!ioctx->buf)
630                 goto out_free_ioctx;
631
632         ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf,
633                                        srp_max_message_size, DMA_BIDIRECTIONAL);
634         if (ib_dma_mapping_error(sdev->device, ioctx->dma))
635                 goto out_free_buf;
636
637         return ioctx;
638
639 out_free_buf:
640         kfree(ioctx->buf);
641 out_free_ioctx:
642         kfree(ioctx);
643 out:
644         return NULL;
645 }
646
647 /*
648  * Deallocate an SRPT I/O context structure.
649  */
650 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
651 {
652         if (!ioctx)
653                 return;
654
655         ib_dma_unmap_single(sdev->device, ioctx->dma,
656                             srp_max_message_size, DMA_BIDIRECTIONAL);
657         kfree(ioctx->buf);
658         kfree(ioctx);
659 }
660
661 /*
662  * Associate a ring of SRPT I/O context structures with the specified device.
663  */
664 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
665 {
666         int i;
667
668         TRACE_ENTRY();
669
670         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
671                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
672
673                 if (!sdev->ioctx_ring[i])
674                         goto err;
675
676                 sdev->ioctx_ring[i]->index = i;
677         }
678
679         TRACE_EXIT_RES(0);
680
681         return 0;
682
683 err:
684         while (--i > 0) {
685                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
686                 sdev->ioctx_ring[i] = NULL;
687         }
688         TRACE_EXIT_RES(-ENOMEM);
689         return -ENOMEM;
690 }
691
692 /* Free the ring of SRPT I/O context structures. */
693 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
694 {
695         int i;
696
697         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
698                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
699                 sdev->ioctx_ring[i] = NULL;
700         }
701 }
702
703 /**
704  * Set the state of a command.
705  * @new: New state to be set.
706  *
707  * Does not modify the state of aborted commands. Returns the previous command
708  * state.
709  */
710 static enum srpt_command_state srpt_set_cmd_state(struct srpt_ioctx *ioctx,
711                                                   enum srpt_command_state new)
712 {
713         enum srpt_command_state previous;
714
715         WARN_ON(!ioctx);
716         WARN_ON(new == SRPT_STATE_NEW);
717
718         do {
719                 previous = atomic_read(&ioctx->state);
720         } while (previous != SRPT_STATE_ABORTED
721                && atomic_cmpxchg(&ioctx->state, previous, new) != previous);
722
723         return previous;
724 }
725
726 /**
727  * Test and set the state of a command.
728  * @old: State to compare against.
729  * @new: New state to be set if the current state matches 'old'.
730  *
731  * Returns the previous command state.
732  */
733 static enum srpt_command_state
734 srpt_test_and_set_cmd_state(struct srpt_ioctx *ioctx,
735                             enum srpt_command_state old,
736                             enum srpt_command_state new)
737 {
738         WARN_ON(!ioctx);
739         WARN_ON(old == SRPT_STATE_ABORTED);
740         WARN_ON(new == SRPT_STATE_NEW);
741
742         return atomic_cmpxchg(&ioctx->state, old, new);
743 }
744
745 /**
746  * Post a receive request on the work queue of InfiniBand device 'sdev'.
747  */
748 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
749 {
750         struct ib_sge list;
751         struct ib_recv_wr wr, *bad_wr;
752
753         wr.wr_id = ioctx->index | SRPT_OP_RECV;
754
755         list.addr = ioctx->dma;
756         list.length = srp_max_message_size;
757         list.lkey = sdev->mr->lkey;
758
759         wr.next = NULL;
760         wr.sg_list = &list;
761         wr.num_sge = 1;
762
763         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
764 }
765
766 /**
767  * Post an IB send request.
768  * @ch: RDMA channel to post the send request on.
769  * @ioctx: I/O context of the send request.
770  * @len: length of the request to be sent in bytes.
771  *
772  * Returns zero upon success and a non-zero value upon failure.
773  */
774 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
775                           int len)
776 {
777         struct ib_sge list;
778         struct ib_send_wr wr, *bad_wr;
779         struct srpt_device *sdev = ch->sport->sdev;
780         int ret;
781
782         ret = -ENOMEM;
783         if (atomic_dec_return(&ch->qp_wr_avail) < 0) {
784                 atomic_inc(&ch->qp_wr_avail);
785                 PRINT_ERROR("%s[%d]: SRQ full", __func__, __LINE__);
786                 goto out;
787         }
788
789         ib_dma_sync_single_for_device(sdev->device, ioctx->dma,
790                                       len, DMA_TO_DEVICE);
791
792         list.addr = ioctx->dma;
793         list.length = len;
794         list.lkey = sdev->mr->lkey;
795
796         wr.next = NULL;
797         wr.wr_id = ioctx->index;
798         wr.sg_list = &list;
799         wr.num_sge = 1;
800         wr.opcode = IB_WR_SEND;
801         wr.send_flags = IB_SEND_SIGNALED;
802
803         ret = ib_post_send(ch->qp, &wr, &bad_wr);
804
805 out:
806         return ret;
807 }
808
809 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd)
810 {
811         struct srp_indirect_buf *idb;
812         struct srp_direct_buf *db;
813         unsigned add_cdb_offset;
814         int ret;
815
816         /*
817          * The pointer computations below will only be compiled correctly
818          * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[].
819          */
820 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
821         BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0)
822                      && !__same_type(srp_cmd->add_data[0], (u8)0));
823 #else
824         /* Note: the __same_type() macro has been introduced in kernel 2.6.31.*/
825 #endif
826
827         ret = 0;
828         /*
829          * According to the SRP spec, the lower two bits of the 'ADDITIONAL
830          * CDB LENGTH' field are reserved and the size in bytes of this field
831          * is four times the value specified in bits 3..7. Hence the "& ~3".
832          */
833         add_cdb_offset = srp_cmd->add_cdb_len & ~3;
834         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
835             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
836                 ioctx->n_rbuf = 1;
837                 ioctx->rbufs = &ioctx->single_rbuf;
838
839                 db = (struct srp_direct_buf *)(srp_cmd->add_data
840                                                + add_cdb_offset);
841                 memcpy(ioctx->rbufs, db, sizeof *db);
842                 ioctx->data_len = be32_to_cpu(db->len);
843         } else {
844                 idb = (struct srp_indirect_buf *)(srp_cmd->add_data
845                                                   + add_cdb_offset);
846
847                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
848
849                 if (ioctx->n_rbuf >
850                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
851                         PRINT_ERROR("received corrupt SRP_CMD request"
852                                     " (%u out + %u in != %u / %zu)",
853                                     srp_cmd->data_out_desc_cnt,
854                                     srp_cmd->data_in_desc_cnt,
855                                     be32_to_cpu(idb->table_desc.len),
856                                     sizeof(*db));
857                         ioctx->n_rbuf = 0;
858                         ret = -EINVAL;
859                         goto out;
860                 }
861
862                 if (ioctx->n_rbuf == 1)
863                         ioctx->rbufs = &ioctx->single_rbuf;
864                 else {
865                         ioctx->rbufs =
866                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
867                         if (!ioctx->rbufs) {
868                                 ioctx->n_rbuf = 0;
869                                 ret = -ENOMEM;
870                                 goto out;
871                         }
872                 }
873
874                 db = idb->desc_list;
875                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
876                 ioctx->data_len = be32_to_cpu(idb->len);
877         }
878 out:
879         return ret;
880 }
881
882 /*
883  * Modify the attributes of queue pair 'qp': allow local write, remote read,
884  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
885  */
886 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
887 {
888         struct ib_qp_attr *attr;
889         int ret;
890
891         attr = kzalloc(sizeof *attr, GFP_KERNEL);
892         if (!attr)
893                 return -ENOMEM;
894
895         attr->qp_state = IB_QPS_INIT;
896         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
897             IB_ACCESS_REMOTE_WRITE;
898         attr->port_num = ch->sport->port;
899         attr->pkey_index = 0;
900
901         ret = ib_modify_qp(qp, attr,
902                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
903                            IB_QP_PKEY_INDEX);
904
905         kfree(attr);
906         return ret;
907 }
908
909 /**
910  * Change the state of a channel to 'ready to receive' (RTR).
911  * @ch: channel of the queue pair.
912  * @qp: queue pair to change the state of.
913  *
914  * Returns zero upon success and a negative value upon failure.
915  *
916  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
917  * If this structure ever becomes larger, it might be necessary to allocate
918  * it dynamically instead of on the stack.
919  */
920 static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
921 {
922         struct ib_qp_attr qp_attr;
923         int attr_mask;
924         int ret;
925
926         qp_attr.qp_state = IB_QPS_RTR;
927         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
928         if (ret)
929                 goto out;
930
931         qp_attr.max_dest_rd_atomic = 4;
932
933         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
934
935 out:
936         return ret;
937 }
938
939 /**
940  * Change the state of a channel to 'ready to send' (RTS).
941  * @ch: channel of the queue pair.
942  * @qp: queue pair to change the state of.
943  *
944  * Returns zero upon success and a negative value upon failure.
945  *
946  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
947  * If this structure ever becomes larger, it might be necessary to allocate
948  * it dynamically instead of on the stack.
949  */
950 static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
951 {
952         struct ib_qp_attr qp_attr;
953         int attr_mask;
954         int ret;
955
956         qp_attr.qp_state = IB_QPS_RTS;
957         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
958         if (ret)
959                 goto out;
960
961         qp_attr.max_rd_atomic = 4;
962
963         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
964
965 out:
966         return ret;
967 }
968
969 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
970 {
971         srpt_unmap_sg_to_ib_sge(ch, ioctx);
972
973         if (ioctx->n_rbuf > 1) {
974                 kfree(ioctx->rbufs);
975                 ioctx->rbufs = NULL;
976         }
977
978         WARN_ON(!ch);
979         if (!ch)
980                 return;
981
982         if (srpt_post_recv(ch->sport->sdev, ioctx))
983                 PRINT_ERROR("%s", "SRQ post_recv failed - this is serious.");
984                 /* we should queue it back to free_ioctx queue */
985         else
986                 atomic_inc(&ch->req_lim_delta);
987 }
988
989 /**
990  * Abort a command.
991  */
992 static void srpt_abort_scst_cmd(struct srpt_device *sdev,
993                                 struct scst_cmd *scmnd)
994 {
995         struct srpt_ioctx *ioctx;
996         scst_data_direction dir;
997         enum srpt_command_state previous_state;
998
999         TRACE_ENTRY();
1000
1001         ioctx = scst_cmd_get_tgt_priv(scmnd);
1002         BUG_ON(!ioctx);
1003
1004         previous_state = srpt_set_cmd_state(ioctx, SRPT_STATE_ABORTED);
1005         if (previous_state == SRPT_STATE_ABORTED)
1006                 goto out;
1007
1008         TRACE_DBG("Aborting cmd with state %d and tag %lld",
1009                   previous_state, scst_cmd_get_tag(scmnd));
1010
1011         dir = scst_cmd_get_data_direction(scmnd);
1012         if (dir != SCST_DATA_NONE && scst_cmd_get_sg(scmnd))
1013                 ib_dma_unmap_sg(sdev->device,
1014                                 scst_cmd_get_sg(scmnd),
1015                                 scst_cmd_get_sg_cnt(scmnd),
1016                                 scst_to_tgt_dma_dir(dir));
1017
1018         switch (previous_state) {
1019         case SRPT_STATE_NEW:
1020                 break;
1021         case SRPT_STATE_NEED_DATA:
1022                 WARN_ON(scst_cmd_get_data_direction(ioctx->scmnd)
1023                         == SCST_DATA_READ);
1024                 scst_rx_data(scmnd,
1025                              SCST_RX_STATUS_ERROR,
1026                              SCST_CONTEXT_THREAD);
1027                 break;
1028         case SRPT_STATE_DATA_IN:
1029         case SRPT_STATE_PROCESSED:
1030                 scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
1031                 WARN_ON(scmnd->state != SCST_CMD_STATE_XMIT_WAIT);
1032                 scst_tgt_cmd_done(scmnd, scst_estimate_context());
1033                 break;
1034         default:
1035                 TRACE_DBG("Aborting cmd with state %d", previous_state);
1036                 WARN_ON("ERROR: unexpected command state");
1037         }
1038
1039 out:
1040         ;
1041
1042         TRACE_EXIT();
1043 }
1044
1045 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
1046 {
1047         struct srpt_ioctx *ioctx;
1048         struct srpt_device *sdev = ch->sport->sdev;
1049
1050         if (wc->wr_id & SRPT_OP_RECV) {
1051                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
1052                 PRINT_ERROR("%s", "This is serious - SRQ is in bad state.");
1053         } else {
1054                 ioctx = sdev->ioctx_ring[wc->wr_id];
1055
1056                 if (ioctx->scmnd)
1057                         srpt_abort_scst_cmd(sdev, ioctx->scmnd);
1058                 else
1059                         srpt_reset_ioctx(ch, ioctx);
1060         }
1061 }
1062
1063 /** Process an IB send completion notification. */
1064 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
1065                                   struct srpt_ioctx *ioctx,
1066                                   enum scst_exec_context context)
1067 {
1068         if (ioctx->scmnd) {
1069                 scst_data_direction dir =
1070                         scst_cmd_get_data_direction(ioctx->scmnd);
1071
1072                 if (dir != SCST_DATA_NONE && scst_cmd_get_sg(ioctx->scmnd))
1073                         ib_dma_unmap_sg(ch->sport->sdev->device,
1074                                         scst_cmd_get_sg(ioctx->scmnd),
1075                                         scst_cmd_get_sg_cnt(ioctx->scmnd),
1076                                         scst_to_tgt_dma_dir(dir));
1077
1078                 WARN_ON(ioctx->scmnd->state != SCST_CMD_STATE_XMIT_WAIT);
1079                 scst_tgt_cmd_done(ioctx->scmnd, context);
1080         } else
1081                 srpt_reset_ioctx(ch, ioctx);
1082 }
1083
1084 /** Process an IB RDMA completion notification. */
1085 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
1086                                   struct srpt_ioctx *ioctx)
1087 {
1088         if (!ioctx->scmnd) {
1089                 WARN_ON("ERROR: ioctx->scmnd == NULL");
1090                 srpt_reset_ioctx(ch, ioctx);
1091                 return;
1092         }
1093
1094         /*
1095          * If an RDMA completion notification has been received for a write
1096          * command, tell SCST that processing can continue by calling
1097          * scst_rx_data().
1098          */
1099         if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
1100                                 SRPT_STATE_DATA_IN) == SRPT_STATE_NEED_DATA) {
1101                 WARN_ON(scst_cmd_get_data_direction(ioctx->scmnd)
1102                         == SCST_DATA_READ);
1103                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
1104                              scst_estimate_context());
1105         }
1106 }
1107
1108 /**
1109  * Build an SRP_RSP response.
1110  * @ch: RDMA channel through which the request has been received.
1111  * @ioctx: I/O context in which the SRP_RSP response will be built.
1112  * @s_key: sense key that will be stored in the response.
1113  * @s_code: value that will be stored in the asc_ascq field of the sense data.
1114  * @tag: tag of the request for which this response is being generated.
1115  *
1116  * Returns the size in bytes of the SRP_RSP response.
1117  *
1118  * An SRP_RSP response contains a SCSI status or service response. See also
1119  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1120  * response. See also SPC-2 for more information about sense data.
1121  */
1122 static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
1123                               struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
1124                               u64 tag)
1125 {
1126         struct srp_rsp *srp_rsp;
1127         struct sense_data *sense;
1128         int limit_delta;
1129         int sense_data_len;
1130         int resp_len;
1131
1132         sense_data_len = (s_key == NO_SENSE) ? 0 : sizeof(*sense);
1133         resp_len = sizeof(*srp_rsp) + sense_data_len;
1134
1135         srp_rsp = ioctx->buf;
1136         memset(srp_rsp, 0, sizeof *srp_rsp);
1137
1138         limit_delta = atomic_read(&ch->req_lim_delta);
1139         atomic_sub(limit_delta, &ch->req_lim_delta);
1140
1141         srp_rsp->opcode = SRP_RSP;
1142         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1143         srp_rsp->tag = tag;
1144
1145         if (s_key != NO_SENSE) {
1146                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
1147                 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
1148                 srp_rsp->sense_data_len = cpu_to_be32(sense_data_len);
1149
1150                 sense = (struct sense_data *)(srp_rsp + 1);
1151                 sense->err_code = 0x70;
1152                 sense->key = s_key;
1153                 sense->asc_ascq = s_code;
1154         }
1155
1156         return resp_len;
1157 }
1158
1159 /**
1160  * Build a task management response, which is a specific SRP_RSP response.
1161  * @ch: RDMA channel through which the request has been received.
1162  * @ioctx: I/O context in which the SRP_RSP response will be built.
1163  * @rsp_code: RSP_CODE that will be stored in the response.
1164  * @tag: tag of the request for which this response is being generated.
1165  *
1166  * Returns the size in bytes of the SRP_RSP response.
1167  *
1168  * An SRP_RSP response contains a SCSI status or service response. See also
1169  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1170  * response.
1171  */
1172 static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
1173                                   struct srpt_ioctx *ioctx, u8 rsp_code,
1174                                   u64 tag)
1175 {
1176         struct srp_rsp *srp_rsp;
1177         int limit_delta;
1178         int resp_data_len;
1179         int resp_len;
1180
1181         resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4;
1182         resp_len = sizeof(*srp_rsp) + resp_data_len;
1183
1184         srp_rsp = ioctx->buf;
1185         memset(srp_rsp, 0, sizeof *srp_rsp);
1186
1187         limit_delta = atomic_read(&ch->req_lim_delta);
1188         atomic_sub(limit_delta, &ch->req_lim_delta);
1189
1190         srp_rsp->opcode = SRP_RSP;
1191         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1192         srp_rsp->tag = tag;
1193
1194         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
1195                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
1196                 srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
1197                 srp_rsp->data[3] = rsp_code;
1198         }
1199
1200         return resp_len;
1201 }
1202
1203 /*
1204  * Process SRP_CMD.
1205  */
1206 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
1207 {
1208         struct scst_cmd *scmnd;
1209         struct srp_cmd *srp_cmd;
1210         struct srp_rsp *srp_rsp;
1211         scst_data_direction dir;
1212         int ret;
1213
1214         srp_cmd = ioctx->buf;
1215         srp_rsp = ioctx->buf;
1216
1217         dir = SCST_DATA_NONE;
1218         if (srp_cmd->buf_fmt) {
1219                 ret = srpt_get_desc_tbl(ioctx, srp_cmd);
1220                 if (ret) {
1221                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1222                                            NO_ADD_SENSE, srp_cmd->tag);
1223                         srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1224                         goto err;
1225                 }
1226
1227                 /*
1228                  * The lower four bits of the buffer format field contain the
1229                  * DATA-IN buffer descriptor format, and the highest four bits
1230                  * contain the DATA-OUT buffer descriptor format.
1231                  */
1232                 if (srp_cmd->buf_fmt & 0xf)
1233                         /* DATA-IN: transfer data from target to initiator. */
1234                         dir = SCST_DATA_READ;
1235                 else if (srp_cmd->buf_fmt >> 4)
1236                         /* DATA-OUT: transfer data from initiator to target. */
1237                         dir = SCST_DATA_WRITE;
1238         }
1239
1240         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
1241                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
1242                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
1243         if (!scmnd) {
1244                 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1245                                    NO_ADD_SENSE, srp_cmd->tag);
1246                 srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1247                 goto err;
1248         }
1249
1250         ioctx->scmnd = scmnd;
1251
1252         switch (srp_cmd->task_attr) {
1253         case SRP_CMD_HEAD_OF_Q:
1254                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
1255                 break;
1256         case SRP_CMD_ORDERED_Q:
1257                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1258                 break;
1259         case SRP_CMD_SIMPLE_Q:
1260                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
1261                 break;
1262         case SRP_CMD_ACA:
1263                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
1264                 break;
1265         default:
1266                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1267                 break;
1268         }
1269
1270         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1271         scst_cmd_set_tgt_priv(scmnd, ioctx);
1272         scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
1273         scst_cmd_init_done(scmnd, scst_estimate_context());
1274
1275         return 0;
1276
1277 err:
1278         WARN_ON(srp_rsp->opcode != SRP_RSP);
1279
1280         return -1;
1281 }
1282
1283 /*
1284  * Process an SRP_TSK_MGMT request.
1285  *
1286  * Returns 0 upon success and -1 upon failure.
1287  *
1288  * Each task management function is performed by calling one of the
1289  * scst_rx_mgmt_fn*() functions. These functions will either report failure
1290  * or process the task management function asynchronously. The function
1291  * srpt_tsk_mgmt_done() will be called by the SCST core upon completion of the
1292  * task management function. When srpt_handle_tsk_mgmt() reports failure
1293  * (i.e. returns -1) a response will have been built in ioctx->buf. This
1294  * information unit has to be sent back by the caller.
1295  *
1296  * For more information about SRP_TSK_MGMT information units, see also section
1297  * 6.7 in the T10 SRP r16a document.
1298  */
1299 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1300                                 struct srpt_ioctx *ioctx)
1301 {
1302         struct srp_tsk_mgmt *srp_tsk;
1303         struct srpt_mgmt_ioctx *mgmt_ioctx;
1304         int ret;
1305
1306         srp_tsk = ioctx->buf;
1307
1308         TRACE_DBG("recv_tsk_mgmt= %d for task_tag= %lld"
1309                   " using tag= %lld cm_id= %p sess= %p",
1310                   srp_tsk->tsk_mgmt_func,
1311                   (unsigned long long) srp_tsk->task_tag,
1312                   (unsigned long long) srp_tsk->tag,
1313                   ch->cm_id, ch->scst_sess);
1314
1315         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1316         if (!mgmt_ioctx) {
1317                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1318                                        srp_tsk->tag);
1319                 goto err;
1320         }
1321
1322         mgmt_ioctx->ioctx = ioctx;
1323         mgmt_ioctx->ch = ch;
1324         mgmt_ioctx->tag = srp_tsk->tag;
1325
1326         switch (srp_tsk->tsk_mgmt_func) {
1327         case SRP_TSK_ABORT_TASK:
1328                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK");
1329                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1330                                           SCST_ABORT_TASK,
1331                                           srp_tsk->task_tag,
1332                                           thread ?
1333                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1334                                           mgmt_ioctx);
1335                 break;
1336         case SRP_TSK_ABORT_TASK_SET:
1337                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK_SET");
1338                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1339                                           SCST_ABORT_TASK_SET,
1340                                           (u8 *) &srp_tsk->lun,
1341                                           sizeof srp_tsk->lun,
1342                                           thread ?
1343                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1344                                           mgmt_ioctx);
1345                 break;
1346         case SRP_TSK_CLEAR_TASK_SET:
1347                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_TASK_SET");
1348                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1349                                           SCST_CLEAR_TASK_SET,
1350                                           (u8 *) &srp_tsk->lun,
1351                                           sizeof srp_tsk->lun,
1352                                           thread ?
1353                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1354                                           mgmt_ioctx);
1355                 break;
1356         case SRP_TSK_LUN_RESET:
1357                 TRACE_DBG("%s", "Processing SRP_TSK_LUN_RESET");
1358                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1359                                           SCST_LUN_RESET,
1360                                           (u8 *) &srp_tsk->lun,
1361                                           sizeof srp_tsk->lun,
1362                                           thread ?
1363                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1364                                           mgmt_ioctx);
1365                 break;
1366         case SRP_TSK_CLEAR_ACA:
1367                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_ACA");
1368                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1369                                           SCST_CLEAR_ACA,
1370                                           (u8 *) &srp_tsk->lun,
1371                                           sizeof srp_tsk->lun,
1372                                           thread ?
1373                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1374                                           mgmt_ioctx);
1375                 break;
1376         default:
1377                 TRACE_DBG("%s", "Unsupported task management function.");
1378                 srpt_build_tskmgmt_rsp(ch, ioctx,
1379                                        SRP_TSK_MGMT_FUNC_NOT_SUPP,
1380                                        srp_tsk->tag);
1381                 goto err;
1382         }
1383
1384         if (ret) {
1385                 TRACE_DBG("%s", "Processing task management function failed.");
1386                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1387                                        srp_tsk->tag);
1388                 goto err;
1389         }
1390
1391         WARN_ON(srp_tsk->opcode == SRP_RSP);
1392
1393         return 0;
1394
1395 err:
1396         WARN_ON(srp_tsk->opcode != SRP_RSP);
1397
1398         kfree(mgmt_ioctx);
1399         return -1;
1400 }
1401
1402 /**
1403  * Process a newly received information unit.
1404  * @ch: RDMA channel through which the information unit has been received.
1405  * @ioctx: SRPT I/O context associated with the information unit.
1406  */
1407 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1408                                struct srpt_ioctx *ioctx)
1409 {
1410         struct srp_cmd *srp_cmd;
1411         struct srp_rsp *srp_rsp;
1412         enum rdma_ch_state ch_state;
1413         int len;
1414
1415         ch_state = atomic_read(&ch->state);
1416         if (ch_state == RDMA_CHANNEL_CONNECTING) {
1417                 list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1418                 return;
1419         } else if (ch_state == RDMA_CHANNEL_DISCONNECTING) {
1420                 srpt_reset_ioctx(ch, ioctx);
1421                 return;
1422         }
1423
1424         WARN_ON(ch_state != RDMA_CHANNEL_LIVE);
1425
1426         ib_dma_sync_single_for_cpu(ch->sport->sdev->device,
1427                                    ioctx->dma, srp_max_message_size,
1428                                    DMA_FROM_DEVICE);
1429
1430         ioctx->data_len = 0;
1431         ioctx->n_rbuf = 0;
1432         ioctx->rbufs = NULL;
1433         ioctx->n_rdma = 0;
1434         ioctx->n_rdma_ius = 0;
1435         ioctx->rdma_ius = NULL;
1436         ioctx->scmnd = NULL;
1437         ioctx->ch = ch;
1438         atomic_set(&ioctx->state, SRPT_STATE_NEW);
1439
1440         srp_cmd = ioctx->buf;
1441         srp_rsp = ioctx->buf;
1442
1443         switch (srp_cmd->opcode) {
1444         case SRP_CMD:
1445                 if (srpt_handle_cmd(ch, ioctx) < 0)
1446                         goto err;
1447                 break;
1448
1449         case SRP_TSK_MGMT:
1450                 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1451                         goto err;
1452                 break;
1453
1454         case SRP_I_LOGOUT:
1455         case SRP_AER_REQ:
1456         default:
1457                 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1458                                    srp_cmd->tag);
1459                 goto err;
1460         }
1461
1462         return;
1463
1464 err:
1465         WARN_ON(srp_rsp->opcode != SRP_RSP);
1466         len = (sizeof *srp_rsp) + be32_to_cpu(srp_rsp->sense_data_len);
1467
1468         ch_state = atomic_read(&ch->state);
1469         if (ch_state != RDMA_CHANNEL_LIVE) {
1470                 /* Give up if another thread modified the channel state. */
1471                 PRINT_ERROR("%s: channel is in state %d", __func__, ch_state);
1472                 srpt_reset_ioctx(ch, ioctx);
1473         } else if (srpt_post_send(ch, ioctx, len)) {
1474                 PRINT_ERROR("%s: sending SRP_RSP response failed", __func__);
1475                 srpt_reset_ioctx(ch, ioctx);
1476         }
1477 }
1478
1479 /*
1480  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1481  * should stop.
1482  * @pre thread != 0
1483  */
1484 static inline int srpt_test_ioctx_list(void)
1485 {
1486         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1487                    unlikely(kthread_should_stop()));
1488         return res;
1489 }
1490
1491 /*
1492  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1493  *
1494  * @pre thread != 0
1495  */
1496 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1497 {
1498         unsigned long flags;
1499
1500         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1501         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1502         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1503         wake_up(&ioctx_list_waitQ);
1504 }
1505
1506 /**
1507  * InfiniBand completion queue callback function.
1508  * @cq: completion queue.
1509  * @ctx: completion queue context, which was passed as the fourth argument of
1510  *       the function ib_create_cq().
1511  */
1512 static void srpt_completion(struct ib_cq *cq, void *ctx)
1513 {
1514         struct srpt_rdma_ch *ch = ctx;
1515         struct srpt_device *sdev = ch->sport->sdev;
1516         struct ib_wc wc;
1517         struct srpt_ioctx *ioctx;
1518
1519         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1520         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1521                 if (wc.status) {
1522                         PRINT_ERROR("failed %s status= %d",
1523                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1524                                wc.status);
1525                         srpt_handle_err_comp(ch, &wc);
1526                         break;
1527                 }
1528
1529                 if (wc.wr_id & SRPT_OP_RECV) {
1530                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1531                         if (thread) {
1532                                 ioctx->ch = ch;
1533                                 ioctx->op = IB_WC_RECV;
1534                                 srpt_schedule_thread(ioctx);
1535                         } else
1536                                 srpt_handle_new_iu(ch, ioctx);
1537                         continue;
1538                 } else {
1539                         ioctx = sdev->ioctx_ring[wc.wr_id];
1540                         if (wc.opcode == IB_WC_SEND)
1541                                 atomic_inc(&ch->qp_wr_avail);
1542                         else {
1543                                 WARN_ON(wc.opcode != IB_WC_RDMA_READ);
1544                                 WARN_ON(ioctx->n_rdma <= 0);
1545                                 atomic_add(ioctx->n_rdma,
1546                                            &ch->qp_wr_avail);
1547                         }
1548                 }
1549
1550                 if (thread) {
1551                         ioctx->ch = ch;
1552                         ioctx->op = wc.opcode;
1553                         srpt_schedule_thread(ioctx);
1554                 } else {
1555                         switch (wc.opcode) {
1556                         case IB_WC_SEND:
1557                                 srpt_handle_send_comp(ch, ioctx,
1558                                         scst_estimate_context());
1559                                 break;
1560                         case IB_WC_RDMA_WRITE:
1561                         case IB_WC_RDMA_READ:
1562                                 srpt_handle_rdma_comp(ch, ioctx);
1563                                 break;
1564                         default:
1565                                 break;
1566                         }
1567                 }
1568
1569 #if defined(CONFIG_SCST_DEBUG)
1570                 if (interrupt_processing_delay_in_us <= MAX_UDELAY_MS * 1000)
1571                         udelay(interrupt_processing_delay_in_us);
1572 #endif
1573         }
1574 }
1575
1576 /*
1577  * Create a completion queue on the specified device.
1578  */
1579 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1580 {
1581         struct ib_qp_init_attr *qp_init;
1582         struct srpt_device *sdev = ch->sport->sdev;
1583         int cqe;
1584         int ret;
1585
1586         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1587         if (!qp_init)
1588                 return -ENOMEM;
1589
1590         /* Create a completion queue (CQ). */
1591
1592         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1593 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1594         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1595 #else
1596         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1597 #endif
1598         if (IS_ERR(ch->cq)) {
1599                 ret = PTR_ERR(ch->cq);
1600                 PRINT_ERROR("failed to create_cq cqe= %d ret= %d", cqe, ret);
1601                 goto out;
1602         }
1603
1604         /* Request completion notification. */
1605
1606         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1607
1608         /* Create a queue pair (QP). */
1609
1610         qp_init->qp_context = (void *)ch;
1611         qp_init->event_handler
1612                 = (void(*)(struct ib_event *, void*))srpt_qp_event;
1613         qp_init->send_cq = ch->cq;
1614         qp_init->recv_cq = ch->cq;
1615         qp_init->srq = sdev->srq;
1616         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1617         qp_init->qp_type = IB_QPT_RC;
1618         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1619         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1620
1621         ch->qp = ib_create_qp(sdev->pd, qp_init);
1622         if (IS_ERR(ch->qp)) {
1623                 ret = PTR_ERR(ch->qp);
1624                 ib_destroy_cq(ch->cq);
1625                 PRINT_ERROR("failed to create_qp ret= %d", ret);
1626                 goto out;
1627         }
1628
1629         atomic_set(&ch->qp_wr_avail, qp_init->cap.max_send_wr);
1630
1631         TRACE_DBG("%s: max_cqe= %d max_sge= %d cm_id= %p",
1632                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1633                ch->cm_id);
1634
1635         /* Modify the attributes and the state of queue pair ch->qp. */
1636
1637         ret = srpt_init_ch_qp(ch, ch->qp);
1638         if (ret) {
1639                 ib_destroy_qp(ch->qp);
1640                 ib_destroy_cq(ch->cq);
1641                 goto out;
1642         }
1643
1644         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1645 out:
1646         kfree(qp_init);
1647         return ret;
1648 }
1649
1650 /**
1651  * Release the channel corresponding to the specified cm_id.
1652  *
1653  * Note: must be called from inside srpt_cm_handler to avoid a race between
1654  * accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one()
1655  * (the caller of srpt_cm_handler holds the cm_id spinlock;
1656  * srpt_remove_one() waits until all SCST sessions for the associated
1657  * IB device have been unregistered and SCST session registration involves
1658  * a call to ib_destroy_cm_id(), which locks the cm_id spinlock and hence
1659  * waits until this function has finished).
1660  */
1661 static void srpt_release_channel_by_cmid(struct ib_cm_id *cm_id)
1662 {
1663         struct srpt_device *sdev;
1664         struct srpt_rdma_ch *ch;
1665
1666         sdev = cm_id->context;
1667         BUG_ON(!sdev);
1668         spin_lock_irq(&sdev->spinlock);
1669         list_for_each_entry(ch, &sdev->rch_list, list) {
1670                 if (ch->cm_id == cm_id) {
1671                         list_del(&ch->list);
1672                         atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING);
1673                         scst_unregister_session(ch->scst_sess, 0,
1674                                                 srpt_release_channel);
1675                         break;
1676                 }
1677         }
1678         spin_unlock_irq(&sdev->spinlock);
1679 }
1680
1681 /**
1682  * Look up the RDMA channel that corresponds to the specified cm_id.
1683  *
1684  * Return NULL if no matching RDMA channel has been found.
1685  */
1686 static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device* sdev,
1687                                               struct ib_cm_id *cm_id)
1688 {
1689         struct srpt_rdma_ch *ch;
1690
1691         BUG_ON(!sdev);
1692         ch = NULL;
1693         spin_lock_irq(&sdev->spinlock);
1694         list_for_each_entry(ch, &sdev->rch_list, list)
1695                 if (ch->cm_id == cm_id)
1696                         break;
1697         spin_unlock_irq(&sdev->spinlock);
1698
1699         return ch;
1700 }
1701
1702 /**
1703  * Release all resources associated with an RDMA channel.
1704  *
1705  * Notes:
1706  * - The caller must have removed the channel from the channel list before
1707  *   calling this function.
1708  * - Must be called as a callback function via scst_unregister_session(). Never
1709  *   call this function directly because doing so would trigger several race
1710  *   conditions.
1711  */
1712 static void srpt_release_channel(struct scst_session *scst_sess)
1713 {
1714         struct srpt_rdma_ch *ch;
1715
1716         TRACE_ENTRY();
1717
1718         ch = scst_sess_get_tgt_priv(scst_sess);
1719         BUG_ON(!ch);
1720         WARN_ON(srpt_find_channel(ch->sport->sdev, ch->cm_id) == ch);
1721
1722         WARN_ON(atomic_read(&ch->state) != RDMA_CHANNEL_DISCONNECTING);
1723
1724         TRACE_DBG("destroying cm_id %p", ch->cm_id);
1725         BUG_ON(!ch->cm_id);
1726         ib_destroy_cm_id(ch->cm_id);
1727
1728         ib_destroy_qp(ch->qp);
1729         ib_destroy_cq(ch->cq);
1730         kfree(ch);
1731
1732         TRACE_EXIT();
1733 }
1734
1735 /**
1736  * Process the event IB_CM_REQ_RECEIVED.
1737  *
1738  * Ownership of the cm_id is transferred to the SCST session if this functions
1739  * returns zero. Otherwise the caller remains the owner of cm_id.
1740  */
1741 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1742                             struct ib_cm_req_event_param *param,
1743                             void *private_data)
1744 {
1745         struct srpt_device *sdev = cm_id->context;
1746         struct srp_login_req *req;
1747         struct srp_login_rsp *rsp;
1748         struct srp_login_rej *rej;
1749         struct ib_cm_rep_param *rep_param;
1750         struct srpt_rdma_ch *ch, *tmp_ch;
1751         u32 it_iu_len;
1752         int ret = 0;
1753
1754 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
1755         WARN_ON(!sdev || !private_data);
1756         if (!sdev || !private_data)
1757                 return -EINVAL;
1758 #else
1759         if (WARN_ON(!sdev || !private_data))
1760                 return -EINVAL;
1761 #endif
1762
1763         req = (struct srp_login_req *)private_data;
1764
1765         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1766
1767         PRINT_INFO("Received SRP_LOGIN_REQ with"
1768             " i_port_id 0x%llx:0x%llx, t_port_id 0x%llx:0x%llx and length %d"
1769             " on port %d (guid=0x%llx:0x%llx)",
1770             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1771             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1772             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1773             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1774             it_iu_len,
1775             param->port,
1776             (unsigned long long)be64_to_cpu(*(u64 *)
1777                                 &sdev->port[param->port - 1].gid.raw[0]),
1778             (unsigned long long)be64_to_cpu(*(u64 *)
1779                                 &sdev->port[param->port - 1].gid.raw[8]));
1780
1781         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1782         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1783         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1784
1785         if (!rsp || !rej || !rep_param) {
1786                 ret = -ENOMEM;
1787                 goto out;
1788         }
1789
1790         if (it_iu_len > srp_max_message_size || it_iu_len < 64) {
1791                 rej->reason =
1792                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1793                 ret = -EINVAL;
1794                 PRINT_ERROR("rejected SRP_LOGIN_REQ because its"
1795                             " length (%d bytes) is out of range (%d .. %d)",
1796                             it_iu_len, 64, srp_max_message_size);
1797                 goto reject;
1798         }
1799
1800         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1801                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1802
1803                 spin_lock_irq(&sdev->spinlock);
1804
1805                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1806                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1807                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1808                             && param->port == ch->sport->port
1809                             && param->listen_id == ch->sport->sdev->cm_id
1810                             && ch->cm_id) {
1811                                 enum rdma_ch_state prev_state;
1812
1813                                 /* found an existing channel */
1814                                 TRACE_DBG("Found existing channel name= %s"
1815                                           " cm_id= %p state= %d",
1816                                           ch->sess_name, ch->cm_id,
1817                                           atomic_read(&ch->state));
1818
1819                                 prev_state = atomic_xchg(&ch->state,
1820                                                 RDMA_CHANNEL_DISCONNECTING);
1821                                 if (prev_state == RDMA_CHANNEL_CONNECTING)
1822                                         list_del(&ch->list);
1823
1824                                 spin_unlock_irq(&sdev->spinlock);
1825
1826                                 rsp->rsp_flags =
1827                                         SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1828
1829                                 if (prev_state == RDMA_CHANNEL_LIVE) {
1830                                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
1831                                         PRINT_INFO("disconnected"
1832                                           " session %s because a new"
1833                                           " SRP_LOGIN_REQ has been received.",
1834                                           ch->sess_name);
1835                                 } else if (prev_state ==
1836                                          RDMA_CHANNEL_CONNECTING) {
1837                                         PRINT_ERROR("%s", "rejected"
1838                                           " SRP_LOGIN_REQ because another login"
1839                                           " request is being processed.");
1840                                         ib_send_cm_rej(ch->cm_id,
1841                                                        IB_CM_REJ_NO_RESOURCES,
1842                                                        NULL, 0, NULL, 0);
1843                                         scst_unregister_session(ch->scst_sess,
1844                                                         0,
1845                                                         srpt_release_channel);
1846                                 }
1847
1848                                 spin_lock_irq(&sdev->spinlock);
1849                         }
1850                 }
1851
1852                 spin_unlock_irq(&sdev->spinlock);
1853
1854         } else
1855                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1856
1857         if (((u64) (*(u64 *) req->target_port_id) !=
1858              cpu_to_be64(srpt_service_guid)) ||
1859             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1860              cpu_to_be64(srpt_service_guid))) {
1861                 rej->reason =
1862                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1863                 ret = -ENOMEM;
1864                 PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because it"
1865                        " has an invalid target port identifier.");
1866                 goto reject;
1867         }
1868
1869         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1870         if (!ch) {
1871                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1872                 PRINT_ERROR("%s",
1873                             "rejected SRP_LOGIN_REQ because out of memory.");
1874                 ret = -ENOMEM;
1875                 goto reject;
1876         }
1877
1878         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1879         memcpy(ch->t_port_id, req->target_port_id, 16);
1880         ch->sport = &sdev->port[param->port - 1];
1881         ch->cm_id = cm_id;
1882         atomic_set(&ch->state, RDMA_CHANNEL_CONNECTING);
1883         INIT_LIST_HEAD(&ch->cmd_wait_list);
1884
1885         ret = srpt_create_ch_ib(ch);
1886         if (ret) {
1887                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1888                 PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because creating"
1889                             " a new RDMA channel failed.");
1890                 goto free_ch;
1891         }
1892
1893         ret = srpt_ch_qp_rtr(ch, ch->qp);
1894         if (ret) {
1895                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1896                 PRINT_ERROR("rejected SRP_LOGIN_REQ because enabling"
1897                        " RTR failed (error code = %d)", ret);
1898                 goto destroy_ib;
1899         }
1900
1901         if (use_port_guid_in_session_name) {
1902                 /*
1903                  * If the kernel module parameter use_port_guid_in_session_name
1904                  * has been specified, use a combination of the target port
1905                  * GUID and the initiator port ID as the session name. This
1906                  * was the original behavior of the SRP target implementation
1907                  * (i.e. before the SRPT was included in OFED 1.3).
1908                  */
1909                 snprintf(ch->sess_name, sizeof(ch->sess_name),
1910                          "0x%016llx%016llx",
1911                          (unsigned long long)be64_to_cpu(*(u64 *)
1912                                 &sdev->port[param->port - 1].gid.raw[8]),
1913                          (unsigned long long)be64_to_cpu(*(u64 *)
1914                                 (ch->i_port_id + 8)));
1915         } else {
1916                 /*
1917                  * Default behavior: use the initator port identifier as the
1918                  * session name.
1919                  */
1920                 snprintf(ch->sess_name, sizeof(ch->sess_name),
1921                          "0x%016llx%016llx",
1922                          (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1923                          (unsigned long long)be64_to_cpu(*(u64 *)
1924                                  (ch->i_port_id + 8)));
1925         }
1926
1927         TRACE_DBG("registering session %s", ch->sess_name);
1928
1929         BUG_ON(!sdev->scst_tgt);
1930         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1931                                               NULL, NULL);
1932         if (!ch->scst_sess) {
1933                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1934                 TRACE_DBG("%s", "Failed to create scst sess");
1935                 goto destroy_ib;
1936         }
1937
1938         TRACE_DBG("Establish connection sess=%p name=%s cm_id=%p",
1939                   ch->scst_sess, ch->sess_name, ch->cm_id);
1940
1941         scst_sess_set_tgt_priv(ch->scst_sess, ch);
1942
1943         /* create srp_login_response */
1944         rsp->opcode = SRP_LOGIN_RSP;
1945         rsp->tag = req->tag;
1946         rsp->max_it_iu_len = req->req_it_iu_len;
1947         rsp->max_ti_iu_len = req->req_it_iu_len;
1948         rsp->buf_fmt =
1949             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1950         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1951         atomic_set(&ch->req_lim_delta, 0);
1952
1953         /* create cm reply */
1954         rep_param->qp_num = ch->qp->qp_num;
1955         rep_param->private_data = (void *)rsp;
1956         rep_param->private_data_len = sizeof *rsp;
1957         rep_param->rnr_retry_count = 7;
1958         rep_param->flow_control = 1;
1959         rep_param->failover_accepted = 0;
1960         rep_param->srq = 1;
1961         rep_param->responder_resources = 4;
1962         rep_param->initiator_depth = 4;
1963
1964         ret = ib_send_cm_rep(cm_id, rep_param);
1965         if (ret) {
1966                 PRINT_ERROR("sending SRP_LOGIN_REQ response failed"
1967                             " (error code = %d)", ret);
1968                 goto release_channel;
1969         }
1970
1971         spin_lock_irq(&sdev->spinlock);
1972         list_add_tail(&ch->list, &sdev->rch_list);
1973         spin_unlock_irq(&sdev->spinlock);
1974
1975         goto out;
1976
1977 release_channel:
1978         atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING);
1979         scst_unregister_session(ch->scst_sess, 0, NULL);
1980         ch->scst_sess = NULL;
1981
1982 destroy_ib:
1983         ib_destroy_qp(ch->qp);
1984         ib_destroy_cq(ch->cq);
1985
1986 free_ch:
1987         kfree(ch);
1988
1989 reject:
1990         rej->opcode = SRP_LOGIN_REJ;
1991         rej->tag = req->tag;
1992         rej->buf_fmt =
1993             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1994
1995         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1996                              (void *)rej, sizeof *rej);
1997
1998 out:
1999         kfree(rep_param);
2000         kfree(rsp);
2001         kfree(rej);
2002
2003         return ret;
2004 }
2005
2006 static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
2007 {
2008         PRINT_INFO("Received InfiniBand REJ packet for cm_id %p.", cm_id);
2009         srpt_release_channel_by_cmid(cm_id);
2010 }
2011
2012 /**
2013  * Process an IB_CM_RTU_RECEIVED or IB_CM_USER_ESTABLISHED event.
2014  *
2015  * An IB_CM_RTU_RECEIVED message indicates that the connection is established
2016  * and that the recipient may begin transmitting (RTU = ready to use).
2017  */
2018 static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
2019 {
2020         struct srpt_rdma_ch *ch;
2021         int ret;
2022
2023         ch = srpt_find_channel(cm_id->context, cm_id);
2024         WARN_ON(!ch);
2025         if (!ch)
2026                 goto out;
2027
2028         if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_CONNECTING,
2029                         RDMA_CHANNEL_LIVE) == RDMA_CHANNEL_CONNECTING) {
2030                 struct srpt_ioctx *ioctx, *ioctx_tmp;
2031
2032                 ret = srpt_ch_qp_rts(ch, ch->qp);
2033
2034                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
2035                                          wait_list) {
2036                         list_del(&ioctx->wait_list);
2037                         srpt_handle_new_iu(ch, ioctx);
2038                 }
2039                 if (ret && srpt_test_and_set_channel_state(ch,
2040                         RDMA_CHANNEL_LIVE,
2041                         RDMA_CHANNEL_DISCONNECTING) == RDMA_CHANNEL_LIVE) {
2042                         TRACE_DBG("cm_id=%p sess_name=%s state=%d",
2043                                   cm_id, ch->sess_name,
2044                                   atomic_read(&ch->state));
2045                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
2046                 }
2047         }
2048
2049 out:
2050         ;
2051 }
2052
2053 static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
2054 {
2055         PRINT_INFO("Received InfiniBand TimeWait exit for cm_id %p.", cm_id);
2056         srpt_release_channel_by_cmid(cm_id);
2057 }
2058
2059 static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
2060 {
2061         PRINT_INFO("Received InfiniBand REP error for cm_id %p.", cm_id);
2062         srpt_release_channel_by_cmid(cm_id);
2063 }
2064
2065 static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
2066 {
2067         struct srpt_rdma_ch *ch;
2068
2069         ch = srpt_find_channel(cm_id->context, cm_id);
2070         WARN_ON(!ch);
2071         if (!ch)
2072                 goto out;
2073
2074         TRACE_DBG("cm_id= %p ch->state= %d", cm_id, atomic_read(&ch->state));
2075
2076         switch (atomic_read(&ch->state)) {
2077         case RDMA_CHANNEL_LIVE:
2078         case RDMA_CHANNEL_CONNECTING:
2079                 ib_send_cm_drep(ch->cm_id, NULL, 0);
2080                 PRINT_INFO("Received DREQ and sent DREP for session %s.",
2081                            ch->sess_name);
2082                 break;
2083         case RDMA_CHANNEL_DISCONNECTING:
2084         default:
2085                 break;
2086         }
2087
2088 out:
2089         ;
2090 }
2091
2092 static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
2093 {
2094         PRINT_INFO("Received InfiniBand DREP message for cm_id %p.", cm_id);
2095         srpt_release_channel_by_cmid(cm_id);
2096 }
2097
2098 /**
2099  * IB connection manager callback function.
2100  *
2101  * A non-zero return value will cause the caller destroy the CM ID.
2102  *
2103  * Note: srpt_cm_handler() must only return a non-zero value when transferring
2104  * ownership of the cm_id to a channel by srpt_cm_req_recv() failed. Returning
2105  * a non-zero value in any other case will trigger a race with the
2106  * ib_destroy_cm_id() call in srpt_release_channel().
2107  */
2108 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2109 {
2110         int ret;
2111
2112         ret = 0;
2113         switch (event->event) {
2114         case IB_CM_REQ_RECEIVED:
2115                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
2116                                        event->private_data);
2117                 break;
2118         case IB_CM_REJ_RECEIVED:
2119                 srpt_cm_rej_recv(cm_id);
2120                 break;
2121         case IB_CM_RTU_RECEIVED:
2122         case IB_CM_USER_ESTABLISHED:
2123                 srpt_cm_rtu_recv(cm_id);
2124                 break;
2125         case IB_CM_DREQ_RECEIVED:
2126                 srpt_cm_dreq_recv(cm_id);
2127                 break;
2128         case IB_CM_DREP_RECEIVED:
2129                 srpt_cm_drep_recv(cm_id);
2130                 break;
2131         case IB_CM_TIMEWAIT_EXIT:
2132                 srpt_cm_timewait_exit(cm_id);
2133                 break;
2134         case IB_CM_REP_ERROR:
2135                 srpt_cm_rep_error(cm_id);
2136                 break;
2137         default:
2138                 break;
2139         }
2140
2141         return ret;
2142 }
2143
2144 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
2145                                  struct srpt_ioctx *ioctx,
2146                                  struct scst_cmd *scmnd)
2147 {
2148         struct scatterlist *scat;
2149         scst_data_direction dir;
2150         struct rdma_iu *riu;
2151         struct srp_direct_buf *db;
2152         dma_addr_t dma_addr;
2153         struct ib_sge *sge;
2154         u64 raddr;
2155         u32 rsize;
2156         u32 tsize;
2157         u32 dma_len;
2158         int count, nrdma;
2159         int i, j, k;
2160
2161         scat = scst_cmd_get_sg(scmnd);
2162         dir = scst_cmd_get_data_direction(scmnd);
2163         WARN_ON(scat == NULL);
2164         count = ib_dma_map_sg(ch->sport->sdev->device, scat,
2165                               scst_cmd_get_sg_cnt(scmnd),
2166                               scst_to_tgt_dma_dir(dir));
2167         if (unlikely(!count))
2168                 return -EBUSY;
2169
2170         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
2171                 nrdma = ioctx->n_rdma_ius;
2172         else {
2173                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
2174
2175                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
2176                                           scst_cmd_atomic(scmnd)
2177                                           ? GFP_ATOMIC : GFP_KERNEL);
2178                 if (!ioctx->rdma_ius) {
2179                         WARN_ON(scat == NULL);
2180                         ib_dma_unmap_sg(ch->sport->sdev->device,
2181                                         scat, scst_cmd_get_sg_cnt(scmnd),
2182                                         scst_to_tgt_dma_dir(dir));
2183                         return -ENOMEM;
2184                 }
2185
2186                 ioctx->n_rdma_ius = nrdma;
2187         }
2188
2189         db = ioctx->rbufs;
2190         tsize = (dir == SCST_DATA_READ) ?
2191                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2192         dma_len = sg_dma_len(&scat[0]);
2193         riu = ioctx->rdma_ius;
2194
2195         /*
2196          * For each remote desc - calculate the #ib_sge.
2197          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
2198          *      each remote desc rdma_iu is required a rdma wr;
2199          * else
2200          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
2201          *      another rdma wr
2202          */
2203         for (i = 0, j = 0;
2204              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2205                 rsize = be32_to_cpu(db->len);
2206                 raddr = be64_to_cpu(db->va);
2207                 riu->raddr = raddr;
2208                 riu->rkey = be32_to_cpu(db->key);
2209                 riu->sge_cnt = 0;
2210
2211                 /* calculate how many sge required for this remote_buf */
2212                 while (rsize > 0 && tsize > 0) {
2213
2214                         if (rsize >= dma_len) {
2215                                 tsize -= dma_len;
2216                                 rsize -= dma_len;
2217                                 raddr += dma_len;
2218
2219                                 if (tsize > 0) {
2220                                         ++j;
2221                                         if (j < count)
2222                                                 dma_len = sg_dma_len(&scat[j]);
2223                                 }
2224                         } else {
2225                                 tsize -= rsize;
2226                                 dma_len -= rsize;
2227                                 rsize = 0;
2228                         }
2229
2230                         ++riu->sge_cnt;
2231
2232                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
2233                                 ++ioctx->n_rdma;
2234                                 riu->sge =
2235                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
2236                                             scst_cmd_atomic(scmnd)
2237                                             ? GFP_ATOMIC : GFP_KERNEL);
2238                                 if (!riu->sge)
2239                                         goto free_mem;
2240
2241                                 ++riu;
2242                                 riu->sge_cnt = 0;
2243                                 riu->raddr = raddr;
2244                                 riu->rkey = be32_to_cpu(db->key);
2245                         }
2246                 }
2247
2248                 ++ioctx->n_rdma;
2249                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
2250                                    scst_cmd_atomic(scmnd)
2251                                    ? GFP_ATOMIC : GFP_KERNEL);
2252                 if (!riu->sge)
2253                         goto free_mem;
2254         }
2255
2256         db = ioctx->rbufs;
2257         scat = scst_cmd_get_sg(scmnd);
2258         tsize = (dir == SCST_DATA_READ) ?
2259                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2260         riu = ioctx->rdma_ius;
2261         dma_len = sg_dma_len(&scat[0]);
2262         dma_addr = sg_dma_address(&scat[0]);
2263
2264         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
2265         for (i = 0, j = 0;
2266              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2267                 rsize = be32_to_cpu(db->len);
2268                 sge = riu->sge;
2269                 k = 0;
2270
2271                 while (rsize > 0 && tsize > 0) {
2272                         sge->addr = dma_addr;
2273                         sge->lkey = ch->sport->sdev->mr->lkey;
2274
2275                         if (rsize >= dma_len) {
2276                                 sge->length =
2277                                         (tsize < dma_len) ? tsize : dma_len;
2278                                 tsize -= dma_len;
2279                                 rsize -= dma_len;
2280
2281                                 if (tsize > 0) {
2282                                         ++j;
2283                                         if (j < count) {
2284                                                 dma_len = sg_dma_len(&scat[j]);
2285                                                 dma_addr =
2286                                                     sg_dma_address(&scat[j]);
2287                                         }
2288                                 }
2289                         } else {
2290                                 sge->length = (tsize < rsize) ? tsize : rsize;
2291                                 tsize -= rsize;
2292                                 dma_len -= rsize;
2293                                 dma_addr += rsize;
2294                                 rsize = 0;
2295                         }
2296
2297                         ++k;
2298                         if (k == riu->sge_cnt && rsize > 0) {
2299                                 ++riu;
2300                                 sge = riu->sge;
2301                                 k = 0;
2302                         } else if (rsize > 0)
2303                                 ++sge;
2304                 }
2305         }
2306
2307         return 0;
2308
2309 free_mem:
2310         srpt_unmap_sg_to_ib_sge(ch, ioctx);
2311
2312         return -ENOMEM;
2313 }
2314
2315 static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
2316                                     struct srpt_ioctx *ioctx)
2317 {
2318         struct scst_cmd *scmnd;
2319         struct scatterlist *scat;
2320         scst_data_direction dir;
2321
2322         BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
2323
2324         while (ioctx->n_rdma)
2325                 kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
2326
2327         kfree(ioctx->rdma_ius);
2328         ioctx->rdma_ius = NULL;
2329
2330         scmnd = ioctx->scmnd;
2331         if (scmnd) {
2332                 BUG_ON(ioctx != scst_cmd_get_tgt_priv(scmnd));
2333                 scat = scst_cmd_get_sg(scmnd);
2334                 if (scat) {
2335                         dir = scst_cmd_get_data_direction(scmnd);
2336                         ib_dma_unmap_sg(ch->sport->sdev->device,
2337                                         scat, scst_cmd_get_sg_cnt(scmnd),
2338                                         scst_to_tgt_dma_dir(dir));
2339                 }
2340         }
2341 }
2342
2343 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2344                               scst_data_direction dir)
2345 {
2346         struct ib_send_wr wr;
2347         struct ib_send_wr *bad_wr;
2348         struct rdma_iu *riu;
2349         int i;
2350         int ret;
2351         int srq_wr_avail;
2352
2353         if (dir == SCST_DATA_WRITE) {
2354                 ret = -ENOMEM;
2355                 srq_wr_avail = atomic_sub_return(ioctx->n_rdma,
2356                                                  &ch->qp_wr_avail);
2357                 if (srq_wr_avail < 0) {
2358                         atomic_add(ioctx->n_rdma, &ch->qp_wr_avail);
2359                         PRINT_INFO("%s[%d]: SRQ full", __func__, __LINE__);
2360                         goto out;
2361                 }
2362         }
2363
2364         ret = 0;
2365         riu = ioctx->rdma_ius;
2366         memset(&wr, 0, sizeof wr);
2367
2368         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
2369                 wr.opcode = (dir == SCST_DATA_READ) ?
2370                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
2371                 wr.next = NULL;
2372                 wr.wr_id = ioctx->index;
2373                 wr.wr.rdma.remote_addr = riu->raddr;
2374                 wr.wr.rdma.rkey = riu->rkey;
2375                 wr.num_sge = riu->sge_cnt;
2376                 wr.sg_list = riu->sge;
2377
2378                 /* only get completion event for the last rdma wr */
2379                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
2380                         wr.send_flags = IB_SEND_SIGNALED;
2381
2382                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
2383                 if (ret)
2384                         goto out;
2385         }
2386
2387 out:
2388         return ret;
2389 }
2390
2391 /*
2392  * Start data transfer between initiator and target. Must not block.
2393  */
2394 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2395                           struct scst_cmd *scmnd)
2396 {
2397         int ret;
2398
2399         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
2400         if (ret) {
2401                 PRINT_ERROR("%s[%d] ret=%d", __func__, __LINE__, ret);
2402                 ret = SCST_TGT_RES_QUEUE_FULL;
2403                 goto out;
2404         }
2405
2406         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
2407         if (ret) {
2408                 if (ret == -EAGAIN || ret == -ENOMEM) {
2409                         PRINT_INFO("%s[%d] queue full -- ret=%d",
2410                                    __func__, __LINE__, ret);
2411                         ret = SCST_TGT_RES_QUEUE_FULL;
2412                 } else {
2413                         PRINT_ERROR("%s[%d] fatal error -- ret=%d",
2414                                     __func__, __LINE__, ret);
2415                         ret = SCST_TGT_RES_FATAL_ERROR;
2416                 }
2417                 goto out_unmap;
2418         }
2419
2420         ret = SCST_TGT_RES_SUCCESS;
2421
2422 out:
2423         return ret;
2424 out_unmap:
2425         srpt_unmap_sg_to_ib_sge(ch, ioctx);
2426         goto out;
2427 }
2428
2429 /*
2430  * Called by the SCST core to inform ib_srpt that data reception from the
2431  * initiator should start (SCST_DATA_WRITE). Must not block.
2432  */
2433 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
2434 {
2435         struct srpt_rdma_ch *ch;
2436         struct srpt_ioctx *ioctx;
2437         enum rdma_ch_state ch_state;
2438         int ret;
2439
2440         ioctx = scst_cmd_get_tgt_priv(scmnd);
2441         BUG_ON(!ioctx);
2442
2443         WARN_ON(srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA)
2444                 == SRPT_STATE_ABORTED);
2445
2446         ch = ioctx->ch;
2447         WARN_ON(ch != scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd)));
2448         BUG_ON(!ch);
2449
2450         ch_state = atomic_read(&ch->state);
2451         if (ch_state == RDMA_CHANNEL_DISCONNECTING) {
2452                 TRACE_DBG("cmd with tag %lld: channel disconnecting",
2453                           scst_cmd_get_tag(scmnd));
2454                 ret = SCST_TGT_RES_FATAL_ERROR;
2455                 goto out;
2456         } else if (ch_state == RDMA_CHANNEL_CONNECTING) {
2457                 ret = SCST_TGT_RES_QUEUE_FULL;
2458                 goto out;
2459         }
2460         ret = srpt_xfer_data(ch, ioctx, scmnd);
2461
2462 out:
2463         return ret;
2464 }
2465
2466 /*
2467  * Called by the SCST core. Transmits the response buffer and status held in
2468  * 'scmnd'. Must not block.
2469  */
2470 static int srpt_xmit_response(struct scst_cmd *scmnd)
2471 {
2472         struct srpt_rdma_ch *ch;
2473         struct srpt_ioctx *ioctx;
2474         struct srp_rsp *srp_rsp;
2475         u64 tag;
2476         int ret = SCST_TGT_RES_SUCCESS;
2477         int dir;
2478         int status;
2479
2480         ioctx = scst_cmd_get_tgt_priv(scmnd);
2481         BUG_ON(!ioctx);
2482
2483         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2484         BUG_ON(!ch);
2485
2486         if (unlikely(scst_cmd_aborted(scmnd))) {
2487                 TRACE_DBG("cmd with tag %lld has been aborted",
2488                           scst_cmd_get_tag(scmnd));
2489                 srpt_abort_scst_cmd(ch->sport->sdev, scmnd);
2490                 ret = SCST_TGT_RES_SUCCESS;
2491                 goto out;
2492         }
2493
2494         if (srpt_set_cmd_state(ioctx, SRPT_STATE_PROCESSED)
2495             == SRPT_STATE_ABORTED) {
2496                 ret = SCST_TGT_RES_SUCCESS;
2497                 goto out;
2498         }
2499
2500         tag = scst_cmd_get_tag(scmnd);
2501         dir = scst_cmd_get_data_direction(scmnd);
2502         status = scst_cmd_get_status(scmnd) & 0xff;
2503
2504         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2505
2506         srp_rsp = ioctx->buf;
2507
2508         if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2509                 unsigned int max_sense_len;
2510
2511                 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2512                 BUILD_BUG_ON(MIN_MAX_MESSAGE_SIZE <= sizeof(*srp_rsp));
2513                 WARN_ON(srp_max_message_size <= sizeof(*srp_rsp));
2514                 max_sense_len = srp_max_message_size - sizeof(*srp_rsp);
2515                 if (srp_rsp->sense_data_len > max_sense_len) {
2516                         PRINT_WARNING("truncated sense data from %d to %d"
2517                                 " bytes", srp_rsp->sense_data_len,
2518                                 max_sense_len);
2519                         srp_rsp->sense_data_len = max_sense_len;
2520                 }
2521
2522                 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2523                        srp_rsp->sense_data_len);
2524
2525                 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2526                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2527
2528                 if (!status)
2529                         status = SAM_STAT_CHECK_CONDITION;
2530         }
2531
2532         srp_rsp->status = status;
2533
2534         /* For read commands, transfer the data to the initiator. */
2535         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2536                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2537                 if (ret != SCST_TGT_RES_SUCCESS) {
2538                         PRINT_ERROR("%s: tag= %lld xfer_data failed",
2539                                     __func__, (unsigned long long)tag);
2540                         goto out;
2541                 }
2542         }
2543
2544         if (srpt_post_send(ch, ioctx,
2545                            sizeof *srp_rsp +
2546                            be32_to_cpu(srp_rsp->sense_data_len))) {
2547                 PRINT_ERROR("%s[%d]: ch->state= %d tag= %lld",
2548                             __func__, __LINE__, atomic_read(&ch->state),
2549                             (unsigned long long)tag);
2550                 ret = SCST_TGT_RES_FATAL_ERROR;
2551         }
2552
2553 out:
2554         return ret;
2555 }
2556
2557 /*
2558  * Called by the SCST core to inform ib_srpt that a received task management
2559  * function has been completed. Must not block.
2560  */
2561 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2562 {
2563         struct srpt_rdma_ch *ch;
2564         struct srpt_mgmt_ioctx *mgmt_ioctx;
2565         struct srpt_ioctx *ioctx;
2566         int rsp_len;
2567
2568         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2569         BUG_ON(!mgmt_ioctx);
2570
2571         ch = mgmt_ioctx->ch;
2572         BUG_ON(!ch);
2573
2574         ioctx = mgmt_ioctx->ioctx;
2575         BUG_ON(!ioctx);
2576
2577         TRACE_DBG("%s: tsk_mgmt_done for tag= %lld status=%d",
2578                   __func__, (unsigned long long)mgmt_ioctx->tag,
2579                   scst_mgmt_cmd_get_status(mcmnd));
2580
2581         if (srpt_set_cmd_state(ioctx, SRPT_STATE_PROCESSED)
2582             == SRPT_STATE_ABORTED)
2583                 goto out;
2584
2585         rsp_len = srpt_build_tskmgmt_rsp(ch, ioctx,
2586                                          (scst_mgmt_cmd_get_status(mcmnd) ==
2587                                           SCST_MGMT_STATUS_SUCCESS) ?
2588                                          SRP_TSK_MGMT_SUCCESS :
2589                                          SRP_TSK_MGMT_FAILED,
2590                                          mgmt_ioctx->tag);
2591         srpt_post_send(ch, ioctx, rsp_len);
2592
2593         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2594
2595         kfree(mgmt_ioctx);
2596
2597 out:
2598         ;
2599 }
2600
2601 /*
2602  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2603  * to be freed. May be called in IRQ context.
2604  */
2605 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2606 {
2607         struct srpt_rdma_ch *ch;
2608         struct srpt_ioctx *ioctx;
2609
2610         ioctx = scst_cmd_get_tgt_priv(scmnd);
2611         BUG_ON(!ioctx);
2612
2613         ch = ioctx->ch;
2614         BUG_ON(!ch);
2615
2616         scst_cmd_set_tgt_priv(scmnd, NULL);
2617         srpt_set_cmd_state(ioctx, SRPT_STATE_ABORTED);
2618         ioctx->scmnd = NULL;
2619         ioctx->ch = NULL;
2620         srpt_reset_ioctx(ch, ioctx);
2621 }
2622
2623 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2624 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2625 static void srpt_refresh_port_work(void *ctx)
2626 #else
2627 static void srpt_refresh_port_work(struct work_struct *work)
2628 #endif
2629 {
2630 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2631         struct srpt_port *sport = (struct srpt_port *)ctx;
2632 #else
2633         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2634 #endif
2635
2636         srpt_refresh_port(sport);
2637 }
2638
2639 /*
2640  * Called by the SCST core to detect target adapters. Returns the number of
2641  * detected target adapters.
2642  */
2643 static int srpt_detect(struct scst_tgt_template *tp)
2644 {
2645         int device_count;
2646
2647         TRACE_ENTRY();
2648
2649         device_count = atomic_read(&srpt_device_count);
2650
2651         TRACE_EXIT_RES(device_count);
2652
2653         return device_count;
2654 }
2655
2656 /*
2657  * Callback function called by the SCST core from scst_unregister() to free up
2658  * the resources associated with device scst_tgt.
2659  */
2660 static int srpt_release(struct scst_tgt *scst_tgt)
2661 {
2662         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2663         struct srpt_rdma_ch *ch, *tmp_ch;
2664
2665         TRACE_ENTRY();
2666
2667         BUG_ON(!scst_tgt);
2668 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2669         WARN_ON(!sdev);
2670         if (!sdev)
2671                 return -ENODEV;
2672 #else
2673         if (WARN_ON(!sdev))
2674                 return -ENODEV;
2675 #endif
2676
2677 #ifdef CONFIG_SCST_PROC
2678         srpt_unregister_procfs_entry(scst_tgt->tgtt);
2679 #endif /*CONFIG_SCST_PROC*/
2680
2681         spin_lock_irq(&sdev->spinlock);
2682         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
2683                 list_del(&ch->list);
2684                 atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING);
2685                 spin_unlock_irq(&sdev->spinlock);
2686                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
2687                 scst_unregister_session(ch->scst_sess, true,
2688                                         srpt_release_channel);
2689                 spin_lock_irq(&sdev->spinlock);
2690         }
2691         spin_unlock_irq(&sdev->spinlock);
2692
2693         srpt_unregister_mad_agent(sdev);
2694
2695         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2696
2697         TRACE_EXIT();
2698
2699         return 0;
2700 }
2701
2702 /*
2703  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2704  * when the module parameter 'thread' is not zero (the default is zero).
2705  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2706  *
2707  * @pre thread != 0
2708  */
2709 static int srpt_ioctx_thread(void *arg)
2710 {
2711         struct srpt_ioctx *ioctx;
2712
2713         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2714         current->flags |= PF_NOFREEZE;
2715
2716         spin_lock_irq(&srpt_thread.thread_lock);
2717         while (!kthread_should_stop()) {
2718                 wait_queue_t wait;
2719                 init_waitqueue_entry(&wait, current);
2720
2721                 if (!srpt_test_ioctx_list()) {
2722                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2723
2724                         for (;;) {
2725                                 set_current_state(TASK_INTERRUPTIBLE);
2726                                 if (srpt_test_ioctx_list())
2727                                         break;
2728                                 spin_unlock_irq(&srpt_thread.thread_lock);
2729                                 schedule();
2730                                 spin_lock_irq(&srpt_thread.thread_lock);
2731                         }
2732                         set_current_state(TASK_RUNNING);
2733                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2734                 }
2735
2736                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2737                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2738                                            struct srpt_ioctx, comp_list);
2739
2740                         list_del(&ioctx->comp_list);
2741
2742                         spin_unlock_irq(&srpt_thread.thread_lock);
2743                         switch (ioctx->op) {
2744                         case IB_WC_SEND:
2745                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2746                                         SCST_CONTEXT_DIRECT);
2747                                 break;
2748                         case IB_WC_RDMA_WRITE:
2749                         case IB_WC_RDMA_READ:
2750                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2751                                 break;
2752                         case IB_WC_RECV:
2753                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2754                                 break;
2755                         default:
2756                                 break;
2757                         }
2758 #if defined(CONFIG_SCST_DEBUG)
2759                         if (thread_processing_delay_in_us
2760                             <= MAX_UDELAY_MS * 1000)
2761                                 udelay(thread_processing_delay_in_us);
2762 #endif
2763                         spin_lock_irq(&srpt_thread.thread_lock);
2764                 }
2765         }
2766         spin_unlock_irq(&srpt_thread.thread_lock);
2767
2768         return 0;
2769 }
2770
2771 /* SCST target template for the SRP target implementation. */
2772 static struct scst_tgt_template srpt_template = {
2773         .name = DRV_NAME,
2774         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2775         .xmit_response_atomic = 1,
2776         .rdy_to_xfer_atomic = 1,
2777         .detect = srpt_detect,
2778         .release = srpt_release,
2779         .xmit_response = srpt_xmit_response,
2780         .rdy_to_xfer = srpt_rdy_to_xfer,
2781         .on_free_cmd = srpt_on_free_cmd,
2782         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2783 };
2784
2785 /*
2786  * The callback function srpt_release_class_dev() is called whenever a
2787  * device is removed from the /sys/class/infiniband_srpt device class.
2788  * Although this function has been left empty, a release function has been
2789  * defined such that upon module removal no complaint is logged about a
2790  * missing release function.
2791  */
2792 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2793 static void srpt_release_class_dev(struct class_device *class_dev)
2794 #else
2795 static void srpt_release_class_dev(struct device *dev)
2796 #endif
2797 {
2798 }
2799
2800 #ifdef CONFIG_SCST_PROC
2801
2802 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2803 static int srpt_trace_level_show(struct seq_file *seq, void *v)
2804 {
2805         return scst_proc_log_entry_read(seq, trace_flag, NULL);
2806 }
2807
2808 static ssize_t srpt_proc_trace_level_write(struct file *file,
2809         const char __user *buf, size_t length, loff_t *off)
2810 {
2811         return scst_proc_log_entry_write(file, buf, length, &trace_flag,
2812                 DEFAULT_SRPT_TRACE_FLAGS, NULL);
2813 }
2814
2815 static struct scst_proc_data srpt_log_proc_data = {
2816         SCST_DEF_RW_SEQ_OP(srpt_proc_trace_level_write)
2817         .show = srpt_trace_level_show,
2818 };
2819 #endif
2820
2821 #endif /* CONFIG_SCST_PROC */
2822
2823 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2824 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2825 #else
2826 static ssize_t show_login_info(struct device *dev,
2827                                struct device_attribute *attr, char *buf)
2828 #endif
2829 {
2830         struct srpt_device *sdev =
2831 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2832                 container_of(class_dev, struct srpt_device, class_dev);
2833 #else
2834                 container_of(dev, struct srpt_device, dev);
2835 #endif
2836         struct srpt_port *sport;
2837         int i;
2838         int len = 0;
2839
2840         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2841                 sport = &sdev->port[i];
2842
2843                 len += sprintf(buf + len,
2844                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2845                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2846                                "service_id=%016llx\n",
2847                                (unsigned long long) srpt_service_guid,
2848                                (unsigned long long) srpt_service_guid,
2849                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2850                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2851                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2852                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2853                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2854                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2855                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2856                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2857                                (unsigned long long) srpt_service_guid);
2858         }
2859
2860         return len;
2861 }
2862
2863 static struct class_attribute srpt_class_attrs[] = {
2864         __ATTR_NULL,
2865 };
2866
2867 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2868 static struct class_device_attribute srpt_dev_attrs[] = {
2869 #else
2870 static struct device_attribute srpt_dev_attrs[] = {
2871 #endif
2872         __ATTR(login_info, S_IRUGO, show_login_info, NULL),
2873         __ATTR_NULL,
2874 };
2875
2876 static struct class srpt_class = {
2877         .name        = "infiniband_srpt",
2878 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2879         .release = srpt_release_class_dev,
2880 #else
2881         .dev_release = srpt_release_class_dev,
2882 #endif
2883         .class_attrs = srpt_class_attrs,
2884 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2885         .class_dev_attrs = srpt_dev_attrs,
2886 #else
2887         .dev_attrs   = srpt_dev_attrs,
2888 #endif
2889 };
2890
2891 /*
2892  * Callback function called by the InfiniBand core when either an InfiniBand
2893  * device has been added or during the ib_register_client() call for each
2894  * registered InfiniBand device.
2895  */
2896 static void srpt_add_one(struct ib_device *device)
2897 {
2898         struct srpt_device *sdev;
2899         struct srpt_port *sport;
2900         struct ib_srq_init_attr srq_attr;
2901         int i;
2902
2903         TRACE_ENTRY();
2904
2905         TRACE_DBG("device = %p, device->dma_ops = %p", device, device->dma_ops);
2906
2907         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2908         if (!sdev)
2909                 return;
2910
2911         sdev->device = device;
2912
2913 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2914         sdev->class_dev.class = &srpt_class;
2915         sdev->class_dev.dev = device->dma_device;
2916         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2917                  "srpt-%s", device->name);
2918 #else
2919         sdev->dev.class = &srpt_class;
2920         sdev->dev.parent = device->dma_device;
2921 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2922         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2923 #else
2924         dev_set_name(&sdev->dev, "srpt-%s", device->name);
2925 #endif
2926 #endif
2927
2928 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2929         if (class_device_register(&sdev->class_dev))
2930                 goto free_dev;
2931 #else
2932         if (device_register(&sdev->dev))
2933                 goto free_dev;
2934 #endif
2935
2936         if (ib_query_device(device, &sdev->dev_attr))
2937                 goto err_dev;
2938
2939         sdev->pd = ib_alloc_pd(device);
2940         if (IS_ERR(sdev->pd))
2941                 goto err_dev;
2942
2943         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2944         if (IS_ERR(sdev->mr))
2945                 goto err_pd;
2946
2947         srq_attr.event_handler = srpt_srq_event;
2948         srq_attr.srq_context = (void *)sdev;
2949         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2950         srq_attr.attr.max_sge = 1;
2951         srq_attr.attr.srq_limit = 0;
2952
2953         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2954         if (IS_ERR(sdev->srq))
2955                 goto err_mr;
2956
2957         TRACE_DBG("%s: create SRQ #wr= %d max_allow=%d dev= %s",
2958                __func__, srq_attr.attr.max_wr,
2959               sdev->dev_attr.max_srq_wr, device->name);
2960
2961         if (!srpt_service_guid)
2962                 srpt_service_guid = be64_to_cpu(device->node_guid);
2963
2964         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2965         if (IS_ERR(sdev->cm_id))
2966                 goto err_srq;
2967
2968         /* print out target login information */
2969         TRACE_DBG("Target login info: id_ext=%016llx,"
2970                   "ioc_guid=%016llx,pkey=ffff,service_id=%016llx",
2971                   (unsigned long long) srpt_service_guid,
2972                   (unsigned long long) srpt_service_guid,
2973                   (unsigned long long) srpt_service_guid);
2974
2975         /*
2976          * We do not have a consistent service_id (ie. also id_ext of target_id)
2977          * to identify this target. We currently use the guid of the first HCA
2978          * in the system as service_id; therefore, the target_id will change
2979          * if this HCA is gone bad and replaced by different HCA
2980          */
2981         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0, NULL))
2982                 goto err_cm;
2983
2984         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2985                               srpt_event_handler);
2986         if (ib_register_event_handler(&sdev->event_handler))
2987                 goto err_cm;
2988
2989         if (srpt_alloc_ioctx_ring(sdev))
2990                 goto err_event;
2991
2992         INIT_LIST_HEAD(&sdev->rch_list);
2993         spin_lock_init(&sdev->spinlock);
2994
2995         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2996                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2997
2998         ib_set_client_data(device, &srpt_client, sdev);
2999
3000         sdev->scst_tgt = scst_register(&srpt_template, NULL);
3001         if (!sdev->scst_tgt) {
3002                 PRINT_ERROR("SCST registration failed for %s.",
3003                             sdev->device->name);
3004                 goto err_ring;
3005         }
3006
3007         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
3008
3009         WARN_ON(sdev->device->phys_port_cnt
3010                 > sizeof(sdev->port)/sizeof(sdev->port[0]));
3011
3012         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
3013                 sport = &sdev->port[i - 1];
3014                 sport->sdev = sdev;
3015                 sport->port = i;
3016 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
3017                 /*
3018                  * A vanilla 2.6.19 or older kernel without backported OFED
3019                  * kernel headers.
3020                  */
3021                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
3022 #else
3023                 INIT_WORK(&sport->work, srpt_refresh_port_work);
3024 #endif
3025                 if (srpt_refresh_port(sport)) {
3026                         PRINT_ERROR("MAD registration failed for %s-%d.",
3027                                     sdev->device->name, i);
3028                         goto err_refresh_port;
3029                 }
3030         }
3031
3032         atomic_inc(&srpt_device_count);
3033
3034         TRACE_EXIT();
3035
3036         return;
3037
3038 err_refresh_port:
3039         scst_unregister(sdev->scst_tgt);
3040 err_ring:
3041         ib_set_client_data(device, &srpt_client, NULL);
3042         srpt_free_ioctx_ring(sdev);
3043 err_event:
3044         ib_unregister_event_handler(&sdev->event_handler);
3045 err_cm:
3046         ib_destroy_cm_id(sdev->cm_id);
3047 err_srq:
3048         ib_destroy_srq(sdev->srq);
3049 err_mr:
3050         ib_dereg_mr(sdev->mr);
3051 err_pd:
3052         ib_dealloc_pd(sdev->pd);
3053 err_dev:
3054 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3055         class_device_unregister(&sdev->class_dev);
3056 #else
3057         device_unregister(&sdev->dev);
3058 #endif
3059 free_dev:
3060         kfree(sdev);
3061
3062         TRACE_EXIT();
3063 }
3064
3065 /*
3066  * Callback function called by the InfiniBand core when either an InfiniBand
3067  * device has been removed or during the ib_unregister_client() call for each
3068  * registered InfiniBand device.
3069  */
3070 static void srpt_remove_one(struct ib_device *device)
3071 {
3072         int i;
3073         struct srpt_device *sdev;
3074
3075         TRACE_ENTRY();
3076
3077         sdev = ib_get_client_data(device, &srpt_client);
3078 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
3079         WARN_ON(!sdev);
3080         if (!sdev)
3081                 return;
3082 #else
3083         if (WARN_ON(!sdev))
3084                 return;
3085 #endif
3086
3087         /*
3088          * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
3089          * finished if it is running.
3090          */
3091         for (i = 0; i < sdev->device->phys_port_cnt; i++)
3092 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
3093                 cancel_work_sync(&sdev->port[i].work);
3094 #else
3095                 /*
3096                  * cancel_work_sync() was introduced in kernel 2.6.22. Older
3097                  * kernels do not have a facility to cancel scheduled work.
3098                  */
3099                 PRINT_ERROR("%s",
3100                        "your kernel does not provide cancel_work_sync().");
3101 #endif
3102
3103         scst_unregister(sdev->scst_tgt);
3104         sdev->scst_tgt = NULL;
3105
3106         ib_unregister_event_handler(&sdev->event_handler);
3107         ib_destroy_cm_id(sdev->cm_id);
3108         ib_destroy_srq(sdev->srq);
3109         ib_dereg_mr(sdev->mr);
3110         ib_dealloc_pd(sdev->pd);
3111 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3112         class_device_unregister(&sdev->class_dev);
3113 #else
3114         device_unregister(&sdev->dev);
3115 #endif
3116
3117         srpt_free_ioctx_ring(sdev);
3118         kfree(sdev);
3119
3120         TRACE_EXIT();
3121 }
3122
3123 #ifdef CONFIG_SCST_PROC
3124
3125 /**
3126  * Create procfs entries for srpt. Currently the only procfs entry created
3127  * by this function is the "trace_level" entry.
3128  */
3129 static int srpt_register_procfs_entry(struct scst_tgt_template *tgt)
3130 {
3131         int res = 0;
3132 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
3133         struct proc_dir_entry *p, *root;
3134
3135         root = scst_proc_get_tgt_root(tgt);
3136         WARN_ON(!root);
3137         if (root) {
3138                 /*
3139                  * Fill in the scst_proc_data::data pointer, which is used in
3140                  * a printk(KERN_INFO ...) statement in
3141                  * scst_proc_log_entry_write() in scst_proc.c.
3142                  */
3143                 srpt_log_proc_data.data = (char *)tgt->name;
3144                 p = scst_create_proc_entry(root, SRPT_PROC_TRACE_LEVEL_NAME,
3145                                            &srpt_log_proc_data);
3146                 if (!p)
3147                         res = -ENOMEM;
3148         } else
3149                 res = -ENOMEM;
3150
3151 #endif
3152         return res;
3153 }
3154
3155 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt)
3156 {
3157 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
3158         struct proc_dir_entry *root;
3159
3160         root = scst_proc_get_tgt_root(tgt);
3161         WARN_ON(!root);
3162         if (root)
3163                 remove_proc_entry(SRPT_PROC_TRACE_LEVEL_NAME, root);
3164 #endif
3165 }
3166
3167 #endif /*CONFIG_SCST_PROC*/
3168
3169 /*
3170  * Module initialization.
3171  *
3172  * Note: since ib_register_client() registers callback functions, and since at
3173  * least one of these callback functions (srpt_add_one()) calls SCST functions,
3174  * the SCST target template must be registered before ib_register_client() is
3175  * called.
3176  */
3177 static int __init srpt_init_module(void)
3178 {
3179         int ret;
3180
3181         ret = -EINVAL;
3182         if (srp_max_message_size < MIN_MAX_MESSAGE_SIZE) {
3183                 PRINT_ERROR("invalid value %d for kernel module parameter"
3184                             " srp_max_message_size -- must be at least %d.",
3185                             srp_max_message_size,
3186                             MIN_MAX_MESSAGE_SIZE);
3187                 goto out;
3188         }
3189
3190         ret = class_register(&srpt_class);
3191         if (ret) {
3192                 PRINT_ERROR("%s", "couldn't register class ib_srpt");
3193                 goto out;
3194         }
3195
3196         ret = scst_register_target_template(&srpt_template);
3197         if (ret < 0) {
3198                 PRINT_ERROR("%s", "couldn't register with scst");
3199                 ret = -ENODEV;
3200                 goto out_unregister_class;
3201         }
3202
3203 #ifdef CONFIG_SCST_PROC
3204         ret = srpt_register_procfs_entry(&srpt_template);
3205         if (ret) {
3206                 PRINT_ERROR("%s", "couldn't register procfs entry");
3207                 goto out_unregister_target;
3208         }
3209 #endif /*CONFIG_SCST_PROC*/
3210
3211         ret = ib_register_client(&srpt_client);
3212         if (ret) {
3213                 PRINT_ERROR("%s", "couldn't register IB client");
3214                 goto out_unregister_target;
3215         }
3216
3217         if (thread) {
3218                 spin_lock_init(&srpt_thread.thread_lock);
3219                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
3220                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
3221                                                  NULL, "srpt_thread");
3222                 if (IS_ERR(srpt_thread.thread)) {
3223                         srpt_thread.thread = NULL;
3224                         thread = 0;
3225                 }
3226         }
3227
3228         return 0;
3229
3230 out_unregister_target:
3231 #ifdef CONFIG_SCST_PROC
3232         /*
3233          * Note: the procfs entry is unregistered in srpt_release(), which is
3234          * called by scst_unregister_target_template().
3235          */
3236 #endif /*CONFIG_SCST_PROC*/
3237         scst_unregister_target_template(&srpt_template);
3238 out_unregister_class:
3239         class_unregister(&srpt_class);
3240 out:
3241         return ret;
3242 }
3243
3244 static void __exit srpt_cleanup_module(void)
3245 {
3246         TRACE_ENTRY();
3247
3248         if (srpt_thread.thread)
3249                 kthread_stop(srpt_thread.thread);
3250         ib_unregister_client(&srpt_client);
3251         scst_unregister_target_template(&srpt_template);
3252         class_unregister(&srpt_class);
3253
3254         TRACE_EXIT();
3255 }
3256
3257 module_init(srpt_init_module);
3258 module_exit(srpt_cleanup_module);