Bug fixes (some introduced in the previous revision, some long-standing):
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43 #include <linux/delay.h>
44 #include <asm/atomic.h>
45 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
46 #include <linux/proc_fs.h>
47 #include <linux/seq_file.h>
48 #endif
49 #include "ib_srpt.h"
50 #include "scst_debug.h"
51
52 #define CONFIG_SCST_PROC
53
54 /* Name of this kernel module. */
55 #define DRV_NAME                "ib_srpt"
56 /* Prefix for printk() kernel messages. */
57 #define LOG_PFX                 DRV_NAME ": "
58 #define DRV_VERSION             "1.0.1"
59 #define DRV_RELDATE             "July 10, 2008"
60 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
61 /* Flags to be used in SCST debug tracing statements. */
62 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
63                                   | TRACE_MGMT | TRACE_SPECIAL)
64 /* Name of the entry that will be created under /proc/scsi_tgt/ib_srpt. */
65 #define SRPT_PROC_TRACE_LEVEL_NAME      "trace_level"
66 #endif
67
68 #define MELLANOX_SRPT_ID_STRING "SCST SRP target"
69
70 MODULE_AUTHOR("Vu Pham");
71 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
72                    "v" DRV_VERSION " (" DRV_RELDATE ")");
73 MODULE_LICENSE("Dual BSD/GPL");
74
75 struct srpt_thread {
76         /* Protects thread_ioctx_list. */
77         spinlock_t thread_lock;
78         /* I/O contexts to be processed by the kernel thread. */
79         struct list_head thread_ioctx_list;
80         /* SRPT kernel thread. */
81         struct task_struct *thread;
82 };
83
84 /*
85  * Global Variables
86  */
87
88 static u64 srpt_service_guid;
89 /* List of srpt_device structures. */
90 static atomic_t srpt_device_count;
91 static int use_port_guid_in_session_name;
92 static int thread = 1;
93 static struct srpt_thread srpt_thread;
94 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
95 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
96 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
97 module_param(trace_flag, long, 0644);
98 MODULE_PARM_DESC(trace_flag,
99                  "Trace flags for the ib_srpt kernel module.");
100 #endif
101 #if defined(CONFIG_SCST_DEBUG)
102 static unsigned long interrupt_processing_delay_in_us;
103 module_param(interrupt_processing_delay_in_us, long, 0744);
104 MODULE_PARM_DESC(interrupt_processing_delay_in_us,
105                  "CQ completion handler interrupt delay in microseconds.");
106 static unsigned long thread_processing_delay_in_us;
107 module_param(thread_processing_delay_in_us, long, 0744);
108 MODULE_PARM_DESC(thread_processing_delay_in_us,
109                  "SRP thread processing delay in microseconds.");
110 #endif
111
112 module_param(thread, int, 0444);
113 MODULE_PARM_DESC(thread,
114                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
115                  "where possible.");
116
117 static unsigned int srp_max_rdma_size = 65536;
118 module_param(srp_max_rdma_size, int, 0744);
119 MODULE_PARM_DESC(thread,
120                  "Maximum size of SRP RDMA transfers for new connections.");
121
122 static unsigned int srp_max_message_size = 4096;
123 module_param(srp_max_message_size, int, 0444);
124 MODULE_PARM_DESC(thread,
125                  "Maximum size of SRP control messages in bytes.");
126
127 module_param(use_port_guid_in_session_name, bool, 0444);
128 MODULE_PARM_DESC(use_port_guid_in_session_name,
129                  "Use target port ID in the SCST session name such that"
130                  " redundant paths between multiport systems can be masked.");
131
132 static void srpt_add_one(struct ib_device *device);
133 static void srpt_remove_one(struct ib_device *device);
134 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
135 #ifdef CONFIG_SCST_PROC
136 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt);
137 #endif /*CONFIG_SCST_PROC*/
138 static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
139                                     struct srpt_ioctx *ioctx);
140 static void srpt_release_channel(struct scst_session *scst_sess);
141
142 static struct ib_client srpt_client = {
143         .name = DRV_NAME,
144         .add = srpt_add_one,
145         .remove = srpt_remove_one
146 };
147
148 /**
149  * Atomically test and set the channel state.
150  * @ch: RDMA channel.
151  * @old: channel state to compare with.
152  * @new: state to change the channel state to if the current state matches the
153  *       argument 'old'.
154  *
155  * Returns the previous channel state.
156  */
157 static enum rdma_ch_state
158 srpt_test_and_set_channel_state(struct srpt_rdma_ch *ch,
159                                 enum rdma_ch_state old,
160                                 enum rdma_ch_state new)
161 {
162         return atomic_cmpxchg(&ch->state, old, new);
163 }
164
165 /*
166  * Callback function called by the InfiniBand core when an asynchronous IB
167  * event occurs. This callback may occur in interrupt context. See also
168  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
169  * Architecture Specification.
170  */
171 static void srpt_event_handler(struct ib_event_handler *handler,
172                                struct ib_event *event)
173 {
174         struct srpt_device *sdev;
175         struct srpt_port *sport;
176
177         TRACE_ENTRY();
178
179         sdev = ib_get_client_data(event->device, &srpt_client);
180         if (!sdev || sdev->device != event->device)
181                 return;
182
183         TRACE_DBG("ASYNC event= %d on device= %s",
184                   event->event, sdev->device->name);
185
186         switch (event->event) {
187         case IB_EVENT_PORT_ERR:
188                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
189                         sport = &sdev->port[event->element.port_num - 1];
190                         sport->lid = 0;
191                         sport->sm_lid = 0;
192                 }
193                 break;
194         case IB_EVENT_PORT_ACTIVE:
195         case IB_EVENT_LID_CHANGE:
196         case IB_EVENT_PKEY_CHANGE:
197         case IB_EVENT_SM_CHANGE:
198         case IB_EVENT_CLIENT_REREGISTER:
199                 /*
200                  * Refresh port data asynchronously. Note: it is safe to call
201                  * schedule_work() even if &sport->work is already on the
202                  * global workqueue because schedule_work() tests for the
203                  * work_pending() condition before adding &sport->work to the
204                  * global work queue.
205                  */
206                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
207                         sport = &sdev->port[event->element.port_num - 1];
208                         if (!sport->lid && !sport->sm_lid)
209                                 schedule_work(&sport->work);
210                 }
211                 break;
212         default:
213                 break;
214         }
215
216         TRACE_EXIT();
217 }
218
219 /*
220  * Callback function called by the InfiniBand core for SRQ (shared receive
221  * queue) events.
222  */
223 static void srpt_srq_event(struct ib_event *event, void *ctx)
224 {
225         TRACE_ENTRY();
226
227         TRACE_DBG("SRQ event %d", event->event);
228
229         TRACE_EXIT();
230 }
231
232 /*
233  * Callback function called by the InfiniBand core for QP (queue pair) events.
234  */
235 static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
236 {
237         TRACE_DBG("QP event %d on cm_id=%p sess_name=%s state=%d",
238                   event->event, ch->cm_id, ch->sess_name,
239                   atomic_read(&ch->state));
240
241         switch (event->event) {
242         case IB_EVENT_COMM_EST:
243 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
244                 ib_cm_notify(ch->cm_id, event->event);
245 #else
246                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
247                 PRINT_ERROR("%s", "how to perform ib_cm_notify() on a"
248                             " vanilla 2.6.18 kernel ???");
249 #endif
250                 break;
251         case IB_EVENT_QP_LAST_WQE_REACHED:
252                 if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_LIVE,
253                         RDMA_CHANNEL_DISCONNECTING) == RDMA_CHANNEL_LIVE) {
254                         PRINT_INFO("disconnected session %s.", ch->sess_name);
255                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
256                 }
257                 break;
258         default:
259                 break;
260         }
261 }
262
263 /*
264  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
265  * the lowest four bits of value in element slot of the array of four bit
266  * elements called c_list (controller list). The index slot is one-based.
267  *
268  * @pre 1 <= slot && 0 <= value && value < 16
269  */
270 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
271 {
272         u16 id;
273         u8 tmp;
274
275         id = (slot - 1) / 2;
276         if (slot & 0x1) {
277                 tmp = c_list[id] & 0xf;
278                 c_list[id] = (value << 4) | tmp;
279         } else {
280                 tmp = c_list[id] & 0xf0;
281                 c_list[id] = (value & 0xf) | tmp;
282         }
283 }
284
285 /*
286  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
287  * ClassPortInfo in the InfiniBand Architecture Specification.
288  */
289 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
290 {
291         struct ib_class_port_info *cif;
292
293         cif = (struct ib_class_port_info *)mad->data;
294         memset(cif, 0, sizeof *cif);
295         cif->base_version = 1;
296         cif->class_version = 1;
297         cif->resp_time_value = 20;
298
299         mad->mad_hdr.status = 0;
300 }
301
302 /*
303  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
304  * InfiniBand Architecture Specification. See also section B.7,
305  * table B.6 in the T10 SRP r16a document.
306  */
307 static void srpt_get_iou(struct ib_dm_mad *mad)
308 {
309         struct ib_dm_iou_info *ioui;
310         u8 slot;
311         int i;
312
313         ioui = (struct ib_dm_iou_info *)mad->data;
314         ioui->change_id = 1;
315         ioui->max_controllers = 16;
316
317         /* set present for slot 1 and empty for the rest */
318         srpt_set_ioc(ioui->controller_list, 1, 1);
319         for (i = 1, slot = 2; i < 16; i++, slot++)
320                 srpt_set_ioc(ioui->controller_list, slot, 0);
321
322         mad->mad_hdr.status = 0;
323 }
324
325 /*
326  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
327  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
328  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
329  * document.
330  */
331 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
332                          struct ib_dm_mad *mad)
333 {
334         struct ib_dm_ioc_profile *iocp;
335
336         iocp = (struct ib_dm_ioc_profile *)mad->data;
337
338         if (!slot || slot > 16) {
339                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
340                 return;
341         }
342
343         if (slot > 2) {
344                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
345                 return;
346         }
347
348         memset(iocp, 0, sizeof *iocp);
349         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
350         iocp->guid = cpu_to_be64(srpt_service_guid);
351         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
352         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
353         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
354         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
355         iocp->subsys_device_id = 0x0;
356         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
357         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
358         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
359         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
360         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
361         iocp->rdma_read_depth = 4;
362         iocp->send_size = cpu_to_be32(srp_max_message_size);
363         iocp->rdma_size = cpu_to_be32(min(max(srp_max_rdma_size, 256U),
364                                           1U << 24));
365         iocp->num_svc_entries = 1;
366         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
367                 SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
368
369         mad->mad_hdr.status = 0;
370 }
371
372 /*
373  * Device management: write ServiceEntries to mad for the given slot. See also
374  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
375  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
376  */
377 static void srpt_get_svc_entries(u64 ioc_guid,
378                                  u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
379 {
380         struct ib_dm_svc_entries *svc_entries;
381
382         WARN_ON(!ioc_guid);
383
384         if (!slot || slot > 16) {
385                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
386                 return;
387         }
388
389         if (slot > 2 || lo > hi || hi > 1) {
390                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
391                 return;
392         }
393
394         svc_entries = (struct ib_dm_svc_entries *)mad->data;
395         memset(svc_entries, 0, sizeof *svc_entries);
396         svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid);
397         snprintf(svc_entries->service_entries[0].name,
398                  sizeof(svc_entries->service_entries[0].name),
399                  "%s%016llx",
400                  SRP_SERVICE_NAME_PREFIX,
401                  (unsigned long long)ioc_guid);
402
403         mad->mad_hdr.status = 0;
404 }
405
406 /*
407  * Actual processing of a received MAD *rq_mad received through source port *sp
408  * (MAD = InfiniBand management datagram). The response to be sent back is
409  * written to *rsp_mad.
410  */
411 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
412                                  struct ib_dm_mad *rsp_mad)
413 {
414         u16 attr_id;
415         u32 slot;
416         u8 hi, lo;
417
418         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
419         switch (attr_id) {
420         case DM_ATTR_CLASS_PORT_INFO:
421                 srpt_get_class_port_info(rsp_mad);
422                 break;
423         case DM_ATTR_IOU_INFO:
424                 srpt_get_iou(rsp_mad);
425                 break;
426         case DM_ATTR_IOC_PROFILE:
427                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
428                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
429                 break;
430         case DM_ATTR_SVC_ENTRIES:
431                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
432                 hi = (u8) ((slot >> 8) & 0xff);
433                 lo = (u8) (slot & 0xff);
434                 slot = (u16) ((slot >> 16) & 0xffff);
435                 srpt_get_svc_entries(srpt_service_guid,
436                                      slot, hi, lo, rsp_mad);
437                 break;
438         default:
439                 rsp_mad->mad_hdr.status =
440                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
441                 break;
442         }
443 }
444
445 /*
446  * Callback function that is called by the InfiniBand core after transmission of
447  * a MAD. (MAD = management datagram; AH = address handle.)
448  */
449 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
450                                   struct ib_mad_send_wc *mad_wc)
451 {
452         ib_destroy_ah(mad_wc->send_buf->ah);
453         ib_free_send_mad(mad_wc->send_buf);
454 }
455
456 /*
457  * Callback function that is called by the InfiniBand core after reception of
458  * a MAD (management datagram).
459  */
460 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
461                                   struct ib_mad_recv_wc *mad_wc)
462 {
463         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
464         struct ib_ah *ah;
465         struct ib_mad_send_buf *rsp;
466         struct ib_dm_mad *dm_mad;
467
468         if (!mad_wc || !mad_wc->recv_buf.mad)
469                 return;
470
471         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
472                                   mad_wc->recv_buf.grh, mad_agent->port_num);
473         if (IS_ERR(ah))
474                 goto err;
475
476         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
477
478         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
479                                  mad_wc->wc->pkey_index, 0,
480                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
481                                  GFP_KERNEL);
482         if (IS_ERR(rsp))
483                 goto err_rsp;
484
485         rsp->ah = ah;
486
487         dm_mad = rsp->mad;
488         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
489         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
490         dm_mad->mad_hdr.status = 0;
491
492         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
493         case IB_MGMT_METHOD_GET:
494                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
495                 break;
496         case IB_MGMT_METHOD_SET:
497                 dm_mad->mad_hdr.status =
498                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
499                 break;
500         default:
501                 dm_mad->mad_hdr.status =
502                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
503                 break;
504         }
505
506         if (!ib_post_send_mad(rsp, NULL)) {
507                 ib_free_recv_mad(mad_wc);
508                 /* will destroy_ah & free_send_mad in send completion */
509                 return;
510         }
511
512         ib_free_send_mad(rsp);
513
514 err_rsp:
515         ib_destroy_ah(ah);
516 err:
517         ib_free_recv_mad(mad_wc);
518 }
519
520 /*
521  * Enable InfiniBand management datagram processing, update the cached sm_lid,
522  * lid and gid values, and register a callback function for processing MADs
523  * on the specified port. It is safe to call this function more than once for
524  * the same port.
525  */
526 static int srpt_refresh_port(struct srpt_port *sport)
527 {
528         struct ib_mad_reg_req reg_req;
529         struct ib_port_modify port_modify;
530         struct ib_port_attr port_attr;
531         int ret;
532
533         TRACE_ENTRY();
534
535         memset(&port_modify, 0, sizeof port_modify);
536         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
537         port_modify.clr_port_cap_mask = 0;
538
539         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
540         if (ret)
541                 goto err_mod_port;
542
543         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
544         if (ret)
545                 goto err_query_port;
546
547         sport->sm_lid = port_attr.sm_lid;
548         sport->lid = port_attr.lid;
549
550         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
551         if (ret)
552                 goto err_query_port;
553
554         if (!sport->mad_agent) {
555                 memset(&reg_req, 0, sizeof reg_req);
556                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
557                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
558                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
559                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
560
561                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
562                                                          sport->port,
563                                                          IB_QPT_GSI,
564                                                          &reg_req, 0,
565                                                          srpt_mad_send_handler,
566                                                          srpt_mad_recv_handler,
567                                                          sport);
568                 if (IS_ERR(sport->mad_agent)) {
569                         ret = PTR_ERR(sport->mad_agent);
570                         sport->mad_agent = NULL;
571                         goto err_query_port;
572                 }
573         }
574
575         TRACE_EXIT_RES(0);
576
577         return 0;
578
579 err_query_port:
580
581         port_modify.set_port_cap_mask = 0;
582         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
583         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
584
585 err_mod_port:
586
587         TRACE_EXIT_RES(ret);
588
589         return ret;
590 }
591
592 /*
593  * Unregister the callback function for processing MADs and disable MAD
594  * processing for all ports of the specified device. It is safe to call this
595  * function more than once for the same device.
596  */
597 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
598 {
599         struct ib_port_modify port_modify = {
600                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
601         };
602         struct srpt_port *sport;
603         int i;
604
605         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
606                 sport = &sdev->port[i - 1];
607                 WARN_ON(sport->port != i);
608                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
609                         PRINT_ERROR("%s", "disabling MAD processing failed.");
610                 if (sport->mad_agent) {
611                         ib_unregister_mad_agent(sport->mad_agent);
612                         sport->mad_agent = NULL;
613                 }
614         }
615 }
616
617 /**
618  * Allocate and initialize an SRPT I/O context structure.
619  */
620 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
621 {
622         struct srpt_ioctx *ioctx;
623
624         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
625         if (!ioctx)
626                 goto out;
627
628         ioctx->buf = kzalloc(srp_max_message_size, GFP_KERNEL);
629         if (!ioctx->buf)
630                 goto out_free_ioctx;
631
632         ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf,
633                                        srp_max_message_size, DMA_BIDIRECTIONAL);
634         if (ib_dma_mapping_error(sdev->device, ioctx->dma))
635                 goto out_free_buf;
636
637         return ioctx;
638
639 out_free_buf:
640         kfree(ioctx->buf);
641 out_free_ioctx:
642         kfree(ioctx);
643 out:
644         return NULL;
645 }
646
647 /*
648  * Deallocate an SRPT I/O context structure.
649  */
650 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
651 {
652         if (!ioctx)
653                 return;
654
655         ib_dma_unmap_single(sdev->device, ioctx->dma,
656                             srp_max_message_size, DMA_BIDIRECTIONAL);
657         kfree(ioctx->buf);
658         kfree(ioctx);
659 }
660
661 /*
662  * Associate a ring of SRPT I/O context structures with the specified device.
663  */
664 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
665 {
666         int i;
667
668         TRACE_ENTRY();
669
670         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
671                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
672
673                 if (!sdev->ioctx_ring[i])
674                         goto err;
675
676                 sdev->ioctx_ring[i]->index = i;
677         }
678
679         TRACE_EXIT_RES(0);
680
681         return 0;
682
683 err:
684         while (--i > 0) {
685                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
686                 sdev->ioctx_ring[i] = NULL;
687         }
688         TRACE_EXIT_RES(-ENOMEM);
689         return -ENOMEM;
690 }
691
692 /* Free the ring of SRPT I/O context structures. */
693 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
694 {
695         int i;
696
697         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
698                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
699                 sdev->ioctx_ring[i] = NULL;
700         }
701 }
702
703 /**
704  * Set the state of a command.
705  * @new: New state to be set.
706  *
707  * Does not modify the state of aborted commands. Returns the previous command
708  * state.
709  */
710 static enum srpt_command_state srpt_set_cmd_state(struct srpt_ioctx *ioctx,
711                                                   enum srpt_command_state new)
712 {
713         enum srpt_command_state previous;
714
715         WARN_ON(!ioctx);
716         WARN_ON(new == SRPT_STATE_NEW);
717
718         do
719                 previous = atomic_read(&ioctx->state);
720         while (previous != SRPT_STATE_ABORTED
721                && atomic_cmpxchg(&ioctx->state, previous, new) != previous);
722
723         return previous;
724 }
725
726 /**
727  * Test and set the state of a command.
728  * @old: State to compare against.
729  * @new: New state to be set if the current state matches 'old'.
730  *
731  * Returns the previous command state.
732  */
733 static enum srpt_command_state
734 srpt_test_and_set_cmd_state(struct srpt_ioctx *ioctx,
735                             enum srpt_command_state old,
736                             enum srpt_command_state new)
737 {
738         WARN_ON(!ioctx);
739         WARN_ON(old == SRPT_STATE_ABORTED);
740         WARN_ON(new == SRPT_STATE_NEW);
741
742         return atomic_cmpxchg(&ioctx->state, old, new);
743 }
744
745 /**
746  * Post a receive request on the work queue of InfiniBand device 'sdev'.
747  */
748 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
749 {
750         struct ib_sge list;
751         struct ib_recv_wr wr, *bad_wr;
752
753         wr.wr_id = ioctx->index | SRPT_OP_RECV;
754
755         list.addr = ioctx->dma;
756         list.length = srp_max_message_size;
757         list.lkey = sdev->mr->lkey;
758
759         wr.next = NULL;
760         wr.sg_list = &list;
761         wr.num_sge = 1;
762
763         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
764 }
765
766 /**
767  * Post an IB send request.
768  * @ch: RDMA channel to post the send request on.
769  * @ioctx: I/O context of the send request.
770  * @len: length of the request to be sent in bytes.
771  *
772  * Returns zero upon success and a non-zero value upon failure.
773  */
774 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
775                           int len)
776 {
777         struct ib_sge list;
778         struct ib_send_wr wr, *bad_wr;
779         struct srpt_device *sdev = ch->sport->sdev;
780         int ret;
781
782         ret = -ENOMEM;
783         if (atomic_dec_return(&ch->qp_wr_avail) < 0) {
784                 atomic_inc(&ch->qp_wr_avail);
785                 PRINT_ERROR("%s[%d]: SRQ full", __func__, __LINE__);
786                 goto out;
787         }
788
789         ib_dma_sync_single_for_device(sdev->device, ioctx->dma,
790                                       len, DMA_TO_DEVICE);
791
792         list.addr = ioctx->dma;
793         list.length = len;
794         list.lkey = sdev->mr->lkey;
795
796         wr.next = NULL;
797         wr.wr_id = ioctx->index;
798         wr.sg_list = &list;
799         wr.num_sge = 1;
800         wr.opcode = IB_WR_SEND;
801         wr.send_flags = IB_SEND_SIGNALED;
802
803         ret = ib_post_send(ch->qp, &wr, &bad_wr);
804
805 out:
806         return ret;
807 }
808
809 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
810                              int *ind)
811 {
812         struct srp_indirect_buf *idb;
813         struct srp_direct_buf *db;
814
815         *ind = 0;
816         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
817             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
818                 ioctx->n_rbuf = 1;
819                 ioctx->rbufs = &ioctx->single_rbuf;
820
821                 ib_dma_sync_single_for_cpu(ioctx->ch->sport->sdev->device,
822                                            ioctx->dma + sizeof(struct srp_cmd),
823                                            sizeof(*db), DMA_FROM_DEVICE);
824
825                 db = (void *)srp_cmd->add_data;
826                 memcpy(ioctx->rbufs, db, sizeof *db);
827                 ioctx->data_len = be32_to_cpu(db->len);
828         } else {
829                 idb = (void *)srp_cmd->add_data;
830
831                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
832
833                 if (ioctx->n_rbuf >
834                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
835                         *ind = 1;
836                         ioctx->n_rbuf = 0;
837                         goto out;
838                 }
839
840                 if (ioctx->n_rbuf == 1)
841                         ioctx->rbufs = &ioctx->single_rbuf;
842                 else
843                         ioctx->rbufs =
844                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
845                 if (!ioctx->rbufs) {
846                         ioctx->n_rbuf = 0;
847                         return -ENOMEM;
848                 }
849
850                 ib_dma_sync_single_for_cpu(ioctx->ch->sport->sdev->device,
851                                            ioctx->dma + sizeof(struct srp_cmd),
852                                            ioctx->n_rbuf * sizeof(*db),
853                                            DMA_FROM_DEVICE);
854
855                 db = idb->desc_list;
856                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
857                 ioctx->data_len = be32_to_cpu(idb->len);
858         }
859 out:
860         return 0;
861 }
862
863 /*
864  * Modify the attributes of queue pair 'qp': allow local write, remote read,
865  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
866  */
867 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
868 {
869         struct ib_qp_attr *attr;
870         int ret;
871
872         attr = kzalloc(sizeof *attr, GFP_KERNEL);
873         if (!attr)
874                 return -ENOMEM;
875
876         attr->qp_state = IB_QPS_INIT;
877         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
878             IB_ACCESS_REMOTE_WRITE;
879         attr->port_num = ch->sport->port;
880         attr->pkey_index = 0;
881
882         ret = ib_modify_qp(qp, attr,
883                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
884                            IB_QP_PKEY_INDEX);
885
886         kfree(attr);
887         return ret;
888 }
889
890 /**
891  * Change the state of a channel to 'ready to receive' (RTR).
892  * @ch: channel of the queue pair.
893  * @qp: queue pair to change the state of.
894  *
895  * Returns zero upon success and a negative value upon failure.
896  *
897  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
898  * If this structure ever becomes larger, it might be necessary to allocate
899  * it dynamically instead of on the stack.
900  */
901 static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
902 {
903         struct ib_qp_attr qp_attr;
904         int attr_mask;
905         int ret;
906
907         qp_attr.qp_state = IB_QPS_RTR;
908         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
909         if (ret)
910                 goto out;
911
912         qp_attr.max_dest_rd_atomic = 4;
913
914         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
915
916 out:
917         return ret;
918 }
919
920 /**
921  * Change the state of a channel to 'ready to send' (RTS).
922  * @ch: channel of the queue pair.
923  * @qp: queue pair to change the state of.
924  *
925  * Returns zero upon success and a negative value upon failure.
926  *
927  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
928  * If this structure ever becomes larger, it might be necessary to allocate
929  * it dynamically instead of on the stack.
930  */
931 static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
932 {
933         struct ib_qp_attr qp_attr;
934         int attr_mask;
935         int ret;
936
937         qp_attr.qp_state = IB_QPS_RTS;
938         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
939         if (ret)
940                 goto out;
941
942         qp_attr.max_rd_atomic = 4;
943
944         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
945
946 out:
947         return ret;
948 }
949
950 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
951 {
952         srpt_unmap_sg_to_ib_sge(ch, ioctx); 
953
954         if (ioctx->n_rbuf > 1) {
955                 kfree(ioctx->rbufs);
956                 ioctx->rbufs = NULL;
957         }
958
959         WARN_ON(!ch);
960         if (!ch)
961                 return;
962
963         if (srpt_post_recv(ch->sport->sdev, ioctx))
964                 PRINT_ERROR("%s", "SRQ post_recv failed - this is serious.");
965                 /* we should queue it back to free_ioctx queue */
966         else
967                 atomic_inc(&ch->req_lim_delta);
968 }
969
970 /**
971  * Abort a command.
972  */
973 static void srpt_abort_scst_cmd(struct srpt_device *sdev,
974                                 struct scst_cmd *scmnd)
975 {
976         struct srpt_ioctx *ioctx;
977         scst_data_direction dir;
978         enum srpt_command_state previous_state;
979
980         TRACE_ENTRY();
981
982         ioctx = scst_cmd_get_tgt_priv(scmnd);
983         BUG_ON(!ioctx);
984
985         previous_state = srpt_set_cmd_state(ioctx, SRPT_STATE_ABORTED);
986         if (previous_state == SRPT_STATE_ABORTED)
987                 goto out;
988
989         TRACE_DBG("Aborting cmd with state %d and tag %lld",
990                   previous_state, scst_cmd_get_tag(scmnd));
991
992         dir = scst_cmd_get_data_direction(scmnd);
993         if (dir != SCST_DATA_NONE && scst_cmd_get_sg(scmnd))
994                 ib_dma_unmap_sg(sdev->device,
995                                 scst_cmd_get_sg(scmnd),
996                                 scst_cmd_get_sg_cnt(scmnd),
997                                 scst_to_tgt_dma_dir(dir));
998
999         switch (previous_state) {
1000         case SRPT_STATE_NEW:
1001                 break;
1002         case SRPT_STATE_NEED_DATA:
1003                 WARN_ON(scst_cmd_get_data_direction(ioctx->scmnd)
1004                         == SCST_DATA_READ);
1005                 scst_rx_data(scmnd,
1006                              SCST_RX_STATUS_ERROR,
1007                              SCST_CONTEXT_THREAD);
1008                 break;
1009         case SRPT_STATE_DATA_IN:
1010         case SRPT_STATE_PROCESSED:
1011                 scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
1012                 WARN_ON(scmnd->state != SCST_CMD_STATE_XMIT_WAIT);
1013                 scst_tgt_cmd_done(scmnd, scst_estimate_context());
1014                 break;
1015         default:
1016                 TRACE_DBG("Aborting cmd with state %d", previous_state);
1017                 WARN_ON("ERROR: unexpected command state");
1018         }
1019
1020 out:
1021         ;
1022
1023         TRACE_EXIT();
1024 }
1025
1026 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
1027 {
1028         struct srpt_ioctx *ioctx;
1029         struct srpt_device *sdev = ch->sport->sdev;
1030
1031         if (wc->wr_id & SRPT_OP_RECV) {
1032                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
1033                 PRINT_ERROR("%s", "This is serious - SRQ is in bad state.");
1034         } else {
1035                 ioctx = sdev->ioctx_ring[wc->wr_id];
1036
1037                 if (ioctx->scmnd)
1038                         srpt_abort_scst_cmd(sdev, ioctx->scmnd);
1039                 else
1040                         srpt_reset_ioctx(ch, ioctx);
1041         }
1042 }
1043
1044 /** Process an IB send completion notification. */
1045 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
1046                                   struct srpt_ioctx *ioctx,
1047                                   enum scst_exec_context context)
1048 {
1049         if (ioctx->scmnd) {
1050                 scst_data_direction dir =
1051                         scst_cmd_get_data_direction(ioctx->scmnd);
1052
1053                 if (dir != SCST_DATA_NONE && scst_cmd_get_sg(ioctx->scmnd))
1054                         ib_dma_unmap_sg(ch->sport->sdev->device,
1055                                         scst_cmd_get_sg(ioctx->scmnd),
1056                                         scst_cmd_get_sg_cnt(ioctx->scmnd),
1057                                         scst_to_tgt_dma_dir(dir));
1058
1059                 WARN_ON(ioctx->scmnd->state != SCST_CMD_STATE_XMIT_WAIT);
1060                 scst_tgt_cmd_done(ioctx->scmnd, context);
1061         } else
1062                 srpt_reset_ioctx(ch, ioctx);
1063 }
1064
1065 /** Process an IB RDMA completion notification. */
1066 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
1067                                   struct srpt_ioctx *ioctx)
1068 {
1069         if (!ioctx->scmnd) {
1070                 WARN_ON("ERROR: ioctx->scmnd == NULL");
1071                 srpt_reset_ioctx(ch, ioctx);
1072                 return;
1073         }
1074
1075         /*
1076          * If an RDMA completion notification has been received for a write
1077          * command, tell SCST that processing can continue by calling
1078          * scst_rx_data().
1079          */
1080         if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
1081                                 SRPT_STATE_DATA_IN) == SRPT_STATE_NEED_DATA) {
1082                 WARN_ON(scst_cmd_get_data_direction(ioctx->scmnd)
1083                         == SCST_DATA_READ);
1084                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
1085                              scst_estimate_context());
1086         }
1087 }
1088
1089 /**
1090  * Build an SRP_RSP response.
1091  * @ch: RDMA channel through which the request has been received.
1092  * @ioctx: I/O context in which the SRP_RSP response will be built.
1093  * @s_key: sense key that will be stored in the response.
1094  * @s_code: value that will be stored in the asc_ascq field of the sense data.
1095  * @tag: tag of the request for which this response is being generated.
1096  *
1097  * Returns the size in bytes of the SRP_RSP response.
1098  *
1099  * An SRP_RSP response contains a SCSI status or service response. See also
1100  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1101  * response. See also SPC-2 for more information about sense data.
1102  */
1103 static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
1104                               struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
1105                               u64 tag)
1106 {
1107         struct srp_rsp *srp_rsp;
1108         struct sense_data *sense;
1109         int limit_delta;
1110         int sense_data_len;
1111         int resp_len;
1112
1113         sense_data_len = (s_key == NO_SENSE) ? 0 : sizeof(*sense);
1114         resp_len = sizeof(*srp_rsp) + sense_data_len;
1115
1116         srp_rsp = ioctx->buf;
1117         memset(srp_rsp, 0, sizeof *srp_rsp);
1118
1119         limit_delta = atomic_read(&ch->req_lim_delta);
1120         atomic_sub(limit_delta, &ch->req_lim_delta);
1121
1122         srp_rsp->opcode = SRP_RSP;
1123         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1124         srp_rsp->tag = tag;
1125
1126         if (s_key != NO_SENSE) {
1127                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
1128                 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
1129                 srp_rsp->sense_data_len = cpu_to_be32(sense_data_len);
1130
1131                 sense = (struct sense_data *)(srp_rsp + 1);
1132                 sense->err_code = 0x70;
1133                 sense->key = s_key;
1134                 sense->asc_ascq = s_code;
1135         }
1136
1137         return resp_len;
1138 }
1139
1140 /**
1141  * Build a task management response, which is a specific SRP_RSP response.
1142  * @ch: RDMA channel through which the request has been received.
1143  * @ioctx: I/O context in which the SRP_RSP response will be built.
1144  * @rsp_code: RSP_CODE that will be stored in the response.
1145  * @tag: tag of the request for which this response is being generated.
1146  *
1147  * Returns the size in bytes of the SRP_RSP response.
1148  *
1149  * An SRP_RSP response contains a SCSI status or service response. See also
1150  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1151  * response.
1152  */
1153 static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
1154                                   struct srpt_ioctx *ioctx, u8 rsp_code,
1155                                   u64 tag)
1156 {
1157         struct srp_rsp *srp_rsp;
1158         int limit_delta;
1159         int resp_data_len;
1160         int resp_len;
1161
1162         resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4;
1163         resp_len = sizeof(*srp_rsp) + resp_data_len;
1164
1165         srp_rsp = ioctx->buf;
1166         memset(srp_rsp, 0, sizeof *srp_rsp);
1167
1168         limit_delta = atomic_read(&ch->req_lim_delta);
1169         atomic_sub(limit_delta, &ch->req_lim_delta);
1170
1171         srp_rsp->opcode = SRP_RSP;
1172         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1173         srp_rsp->tag = tag;
1174
1175         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
1176                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
1177                 srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
1178                 srp_rsp->data[3] = rsp_code;
1179         }
1180
1181         return resp_len;
1182 }
1183
1184 /*
1185  * Process SRP_CMD.
1186  */
1187 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
1188 {
1189         struct scst_cmd *scmnd;
1190         struct srp_cmd *srp_cmd;
1191         struct srp_rsp *srp_rsp;
1192         scst_data_direction dir;
1193         int indirect_desc = 0;
1194         int ret;
1195
1196         srp_cmd = ioctx->buf;
1197         srp_rsp = ioctx->buf;
1198
1199         dir = SCST_DATA_NONE;
1200         if (srp_cmd->buf_fmt) {
1201                 ret = srpt_get_desc_tbl(ioctx, srp_cmd, &indirect_desc);
1202                 if (ret) {
1203                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1204                                            NO_ADD_SENSE, srp_cmd->tag);
1205                         srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1206                         goto err;
1207                 }
1208
1209                 if (indirect_desc) {
1210                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1211                                            NO_ADD_SENSE, srp_cmd->tag);
1212                         srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1213                         goto err;
1214                 }
1215
1216                 /*
1217                  * The lower four bits of the buffer format field contain the
1218                  * DATA-IN buffer descriptor format, and the highest four bits
1219                  * contain the DATA-OUT buffer descriptor format.
1220                  */
1221                 if (srp_cmd->buf_fmt & 0xf)
1222                         /* DATA-IN: transfer data from target to initiator. */
1223                         dir = SCST_DATA_READ;
1224                 else if (srp_cmd->buf_fmt >> 4)
1225                         /* DATA-OUT: transfer data from initiator to target. */
1226                         dir = SCST_DATA_WRITE;
1227         }
1228
1229         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
1230                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
1231                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
1232         if (!scmnd) {
1233                 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1234                                    NO_ADD_SENSE, srp_cmd->tag);
1235                 srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1236                 goto err;
1237         }
1238
1239         ioctx->scmnd = scmnd;
1240
1241         switch (srp_cmd->task_attr) {
1242         case SRP_CMD_HEAD_OF_Q:
1243                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
1244                 break;
1245         case SRP_CMD_ORDERED_Q:
1246                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1247                 break;
1248         case SRP_CMD_SIMPLE_Q:
1249                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
1250                 break;
1251         case SRP_CMD_ACA:
1252                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
1253                 break;
1254         default:
1255                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1256                 break;
1257         }
1258
1259         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1260         scst_cmd_set_tgt_priv(scmnd, ioctx);
1261         scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
1262         scst_cmd_init_done(scmnd, scst_estimate_context());
1263
1264         return 0;
1265
1266 err:
1267         WARN_ON(srp_rsp->opcode != SRP_RSP);
1268
1269         return -1;
1270 }
1271
1272 /*
1273  * Process an SRP_TSK_MGMT request.
1274  *
1275  * Returns 0 upon success and -1 upon failure.
1276  *
1277  * Each task management function is performed by calling one of the
1278  * scst_rx_mgmt_fn*() functions. These functions will either report failure
1279  * or process the task management function asynchronously. The function
1280  * srpt_tsk_mgmt_done() will be called by the SCST core upon completion of the
1281  * task management function. When srpt_handle_tsk_mgmt() reports failure
1282  * (i.e. returns -1) a response will have been built in ioctx->buf. This
1283  * information unit has to be sent back by the caller.
1284  *
1285  * For more information about SRP_TSK_MGMT information units, see also section
1286  * 6.7 in the T10 SRP r16a document.
1287  */
1288 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1289                                 struct srpt_ioctx *ioctx)
1290 {
1291         struct srp_tsk_mgmt *srp_tsk;
1292         struct srpt_mgmt_ioctx *mgmt_ioctx;
1293         int ret;
1294
1295         ib_dma_sync_single_for_cpu(ch->sport->sdev->device, ioctx->dma,
1296                                    sizeof(struct srp_tsk_mgmt),
1297                                    DMA_FROM_DEVICE);
1298
1299         srp_tsk = ioctx->buf;
1300
1301         TRACE_DBG("recv_tsk_mgmt= %d for task_tag= %lld"
1302                   " using tag= %lld cm_id= %p sess= %p",
1303                   srp_tsk->tsk_mgmt_func,
1304                   (unsigned long long) srp_tsk->task_tag,
1305                   (unsigned long long) srp_tsk->tag,
1306                   ch->cm_id, ch->scst_sess);
1307
1308         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1309         if (!mgmt_ioctx) {
1310                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1311                                        srp_tsk->tag);
1312                 goto err;
1313         }
1314
1315         mgmt_ioctx->ioctx = ioctx;
1316         mgmt_ioctx->ch = ch;
1317         mgmt_ioctx->tag = srp_tsk->tag;
1318
1319         switch (srp_tsk->tsk_mgmt_func) {
1320         case SRP_TSK_ABORT_TASK:
1321                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK");
1322                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1323                                           SCST_ABORT_TASK,
1324                                           srp_tsk->task_tag,
1325                                           thread ?
1326                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1327                                           mgmt_ioctx);
1328                 break;
1329         case SRP_TSK_ABORT_TASK_SET:
1330                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK_SET");
1331                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1332                                           SCST_ABORT_TASK_SET,
1333                                           (u8 *) &srp_tsk->lun,
1334                                           sizeof srp_tsk->lun,
1335                                           thread ?
1336                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1337                                           mgmt_ioctx);
1338                 break;
1339         case SRP_TSK_CLEAR_TASK_SET:
1340                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_TASK_SET");
1341                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1342                                           SCST_CLEAR_TASK_SET,
1343                                           (u8 *) &srp_tsk->lun,
1344                                           sizeof srp_tsk->lun,
1345                                           thread ?
1346                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1347                                           mgmt_ioctx);
1348                 break;
1349         case SRP_TSK_LUN_RESET:
1350                 TRACE_DBG("%s", "Processing SRP_TSK_LUN_RESET");
1351                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1352                                           SCST_LUN_RESET,
1353                                           (u8 *) &srp_tsk->lun,
1354                                           sizeof srp_tsk->lun,
1355                                           thread ?
1356                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1357                                           mgmt_ioctx);
1358                 break;
1359         case SRP_TSK_CLEAR_ACA:
1360                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_ACA");
1361                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1362                                           SCST_CLEAR_ACA,
1363                                           (u8 *) &srp_tsk->lun,
1364                                           sizeof srp_tsk->lun,
1365                                           thread ?
1366                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1367                                           mgmt_ioctx);
1368                 break;
1369         default:
1370                 TRACE_DBG("%s", "Unsupported task management function.");
1371                 srpt_build_tskmgmt_rsp(ch, ioctx,
1372                                        SRP_TSK_MGMT_FUNC_NOT_SUPP,
1373                                        srp_tsk->tag);
1374                 goto err;
1375         }
1376
1377         if (ret) {
1378                 TRACE_DBG("%s", "Processing task management function failed.");
1379                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1380                                        srp_tsk->tag);
1381                 goto err;
1382         }
1383
1384         WARN_ON(srp_tsk->opcode == SRP_RSP);
1385
1386         return 0;
1387
1388 err:
1389         WARN_ON(srp_tsk->opcode != SRP_RSP);
1390
1391         kfree(mgmt_ioctx);
1392         return -1;
1393 }
1394
1395 /**
1396  * Process a newly received information unit.
1397  * @ch: RDMA channel through which the information unit has been received.
1398  * @ioctx: SRPT I/O context associated with the information unit.
1399  */
1400 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1401                                struct srpt_ioctx *ioctx)
1402 {
1403         struct srp_cmd *srp_cmd;
1404         struct srp_rsp *srp_rsp;
1405         enum rdma_ch_state ch_state;
1406         int len;
1407
1408         ch_state = atomic_read(&ch->state);
1409         if (ch_state == RDMA_CHANNEL_CONNECTING) {
1410                 list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1411                 return;
1412         } else if (ch_state == RDMA_CHANNEL_DISCONNECTING) {
1413                 srpt_reset_ioctx(ch, ioctx);
1414                 return;
1415         }
1416
1417         WARN_ON(ch_state != RDMA_CHANNEL_LIVE);
1418
1419         ib_dma_sync_single_for_cpu(ch->sport->sdev->device, ioctx->dma,
1420                                    sizeof(struct srp_cmd), DMA_FROM_DEVICE);
1421
1422         ioctx->data_len = 0;
1423         ioctx->n_rbuf = 0;
1424         ioctx->rbufs = NULL;
1425         ioctx->n_rdma = 0;
1426         ioctx->n_rdma_ius = 0;
1427         ioctx->rdma_ius = NULL;
1428         ioctx->scmnd = NULL;
1429         ioctx->ch = ch;
1430         atomic_set(&ioctx->state, SRPT_STATE_NEW);
1431
1432         srp_cmd = ioctx->buf;
1433         srp_rsp = ioctx->buf;
1434
1435         switch (srp_cmd->opcode) {
1436         case SRP_CMD:
1437                 if (srpt_handle_cmd(ch, ioctx) < 0)
1438                         goto err;
1439                 break;
1440
1441         case SRP_TSK_MGMT:
1442                 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1443                         goto err;
1444                 break;
1445
1446         case SRP_I_LOGOUT:
1447         case SRP_AER_REQ:
1448         default:
1449                 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1450                                    srp_cmd->tag);
1451                 goto err;
1452         }
1453
1454         return;
1455
1456 err:
1457         WARN_ON(srp_rsp->opcode != SRP_RSP);
1458         len = (sizeof *srp_rsp) + be32_to_cpu(srp_rsp->sense_data_len);
1459
1460         ch_state = atomic_read(&ch->state);
1461         if (ch_state != RDMA_CHANNEL_LIVE) {
1462                 /* Give up if another thread modified the channel state. */
1463                 PRINT_ERROR("%s: channel is in state %d", __func__, ch_state);
1464                 srpt_reset_ioctx(ch, ioctx);
1465         } else if (srpt_post_send(ch, ioctx, len)) {
1466                 PRINT_ERROR("%s: sending SRP_RSP response failed", __func__);
1467                 srpt_reset_ioctx(ch, ioctx);
1468         }
1469 }
1470
1471 /*
1472  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1473  * should stop.
1474  * @pre thread != 0
1475  */
1476 static inline int srpt_test_ioctx_list(void)
1477 {
1478         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1479                    unlikely(kthread_should_stop()));
1480         return res;
1481 }
1482
1483 /*
1484  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1485  *
1486  * @pre thread != 0
1487  */
1488 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1489 {
1490         unsigned long flags;
1491
1492         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1493         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1494         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1495         wake_up(&ioctx_list_waitQ);
1496 }
1497
1498 /**
1499  * InfiniBand completion queue callback function.
1500  * @cq: completion queue.
1501  * @ctx: completion queue context, which was passed as the fourth argument of
1502  *       the function ib_create_cq().
1503  */
1504 static void srpt_completion(struct ib_cq *cq, void *ctx)
1505 {
1506         struct srpt_rdma_ch *ch = ctx;
1507         struct srpt_device *sdev = ch->sport->sdev;
1508         struct ib_wc wc;
1509         struct srpt_ioctx *ioctx;
1510
1511         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1512         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1513                 if (wc.status) {
1514                         PRINT_ERROR("failed %s status= %d",
1515                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1516                                wc.status);
1517                         srpt_handle_err_comp(ch, &wc);
1518                         break;
1519                 }
1520
1521                 if (wc.wr_id & SRPT_OP_RECV) {
1522                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1523                         if (thread) {
1524                                 ioctx->ch = ch;
1525                                 ioctx->op = IB_WC_RECV;
1526                                 srpt_schedule_thread(ioctx);
1527                         } else
1528                                 srpt_handle_new_iu(ch, ioctx);
1529                         continue;
1530                 } else {
1531                         ioctx = sdev->ioctx_ring[wc.wr_id];
1532                         if (wc.opcode == IB_WC_SEND)
1533                                 atomic_inc(&ch->qp_wr_avail);
1534                         else {
1535                                 WARN_ON(wc.opcode != IB_WC_RDMA_READ);
1536                                 WARN_ON(ioctx->n_rdma <= 0);
1537                                 atomic_add(ioctx->n_rdma,
1538                                            &ch->qp_wr_avail);
1539                         }
1540                 }
1541
1542                 if (thread) {
1543                         ioctx->ch = ch;
1544                         ioctx->op = wc.opcode;
1545                         srpt_schedule_thread(ioctx);
1546                 } else {
1547                         switch (wc.opcode) {
1548                         case IB_WC_SEND:
1549                                 srpt_handle_send_comp(ch, ioctx,
1550                                         scst_estimate_context());
1551                                 break;
1552                         case IB_WC_RDMA_WRITE:
1553                         case IB_WC_RDMA_READ:
1554                                 srpt_handle_rdma_comp(ch, ioctx);
1555                                 break;
1556                         default:
1557                                 break;
1558                         }
1559                 }
1560
1561 #if defined(CONFIG_SCST_DEBUG)
1562                 if (interrupt_processing_delay_in_us <= MAX_UDELAY_MS * 1000)
1563                         udelay(interrupt_processing_delay_in_us);
1564 #endif
1565         }
1566 }
1567
1568 /*
1569  * Create a completion queue on the specified device.
1570  */
1571 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1572 {
1573         struct ib_qp_init_attr *qp_init;
1574         struct srpt_device *sdev = ch->sport->sdev;
1575         int cqe;
1576         int ret;
1577
1578         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1579         if (!qp_init)
1580                 return -ENOMEM;
1581
1582         /* Create a completion queue (CQ). */
1583
1584         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1585 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1586         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1587 #else
1588         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1589 #endif
1590         if (IS_ERR(ch->cq)) {
1591                 ret = PTR_ERR(ch->cq);
1592                 PRINT_ERROR("failed to create_cq cqe= %d ret= %d", cqe, ret);
1593                 goto out;
1594         }
1595
1596         /* Request completion notification. */
1597
1598         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1599
1600         /* Create a queue pair (QP). */
1601
1602         qp_init->qp_context = (void *)ch;
1603         qp_init->event_handler
1604                 = (void(*)(struct ib_event *, void*))srpt_qp_event;
1605         qp_init->send_cq = ch->cq;
1606         qp_init->recv_cq = ch->cq;
1607         qp_init->srq = sdev->srq;
1608         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1609         qp_init->qp_type = IB_QPT_RC;
1610         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1611         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1612
1613         ch->qp = ib_create_qp(sdev->pd, qp_init);
1614         if (IS_ERR(ch->qp)) {
1615                 ret = PTR_ERR(ch->qp);
1616                 ib_destroy_cq(ch->cq);
1617                 PRINT_ERROR("failed to create_qp ret= %d", ret);
1618                 goto out;
1619         }
1620
1621         atomic_set(&ch->qp_wr_avail, qp_init->cap.max_send_wr);
1622
1623         TRACE_DBG("%s: max_cqe= %d max_sge= %d cm_id= %p",
1624                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1625                ch->cm_id);
1626
1627         /* Modify the attributes and the state of queue pair ch->qp. */
1628
1629         ret = srpt_init_ch_qp(ch, ch->qp);
1630         if (ret) {
1631                 ib_destroy_qp(ch->qp);
1632                 ib_destroy_cq(ch->cq);
1633                 goto out;
1634         }
1635
1636         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1637 out:
1638         kfree(qp_init);
1639         return ret;
1640 }
1641
1642 /**
1643  * Look up the RDMA channel that corresponds to the specified cm_id.
1644  *
1645  * Return NULL if no matching RDMA channel has been found.
1646  *
1647  * Notes:
1648  * - Must be called from inside srpt_cm_handler to avoid a race between
1649  *   accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one()
1650  *   (the caller of srpt_cm_handler holds the cm_id spinlock;
1651  *   srpt_remove_one() waits until all SCST sessions for the associated
1652  *   IB device have been unregistered and SCST session registration involves
1653  *   a call to ib_destroy_cm_id(), which locks the cm_id spinlock and hence
1654  *   waits until this function has finished).
1655  * - When release_ch == true the return value may be compared with NULL but
1656  *   but must not be dereferenced because in this case the return value is a
1657  *   dangling pointer. 
1658  */
1659 static struct srpt_rdma_ch *srpt_find_channel(struct ib_cm_id *cm_id,
1660                                               bool release_ch)
1661 {
1662         struct srpt_device *sdev;
1663         struct srpt_rdma_ch *ch;
1664
1665         sdev = cm_id->context;
1666         BUG_ON(!sdev);
1667         ch = NULL;
1668         spin_lock_irq(&sdev->spinlock);
1669         list_for_each_entry(ch, &sdev->rch_list, list) {
1670                 if (ch->cm_id == cm_id) {
1671                         if (release_ch) {
1672                                 list_del(&ch->list);
1673                                 atomic_set(&ch->state,
1674                                            RDMA_CHANNEL_DISCONNECTING);
1675                                 scst_unregister_session(ch->scst_sess, 0,
1676                                                         srpt_release_channel);
1677                         }
1678                         break;
1679                 }
1680         }
1681
1682         spin_unlock_irq(&sdev->spinlock);
1683
1684         return ch;
1685 }
1686
1687 /**
1688  * Release all resources associated with an RDMA channel.
1689  *
1690  * Notes:
1691  * - The caller must have removed the channel from the channel list before
1692  *   calling this function.
1693  * - Must be called as a callback function via scst_unregister_session(). Never
1694  *   call this function directly because doing so would trigger several race
1695  *   conditions.
1696  */
1697 static void srpt_release_channel(struct scst_session *scst_sess)
1698 {
1699         struct srpt_rdma_ch *ch;
1700
1701         TRACE_ENTRY();
1702
1703         ch = scst_sess_get_tgt_priv(scst_sess);
1704         BUG_ON(!ch);
1705         WARN_ON(srpt_find_channel(ch->cm_id, false) == ch);
1706
1707         WARN_ON(atomic_read(&ch->state) != RDMA_CHANNEL_DISCONNECTING);
1708
1709         TRACE_DBG("destroying cm_id %p", ch->cm_id);
1710         BUG_ON(!ch->cm_id);
1711         ib_destroy_cm_id(ch->cm_id);
1712
1713         ib_destroy_qp(ch->qp);
1714         ib_destroy_cq(ch->cq);
1715         kfree(ch);
1716
1717         TRACE_EXIT();
1718 }
1719
1720 /**
1721  * Process the event IB_CM_REQ_RECEIVED.
1722  *
1723  * Ownership of the cm_id is transferred to the SCST session if this functions
1724  * returns zero. Otherwise the caller remains the owner of cm_id.
1725  */
1726 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1727                             struct ib_cm_req_event_param *param,
1728                             void *private_data)
1729 {
1730         struct srpt_device *sdev = cm_id->context;
1731         struct srp_login_req *req;
1732         struct srp_login_rsp *rsp;
1733         struct srp_login_rej *rej;
1734         struct ib_cm_rep_param *rep_param;
1735         struct srpt_rdma_ch *ch, *tmp_ch;
1736         u32 it_iu_len;
1737         int ret = 0;
1738
1739 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
1740         WARN_ON(!sdev || !private_data);
1741         if (!sdev || !private_data)
1742                 return -EINVAL;
1743 #else
1744         if (WARN_ON(!sdev || !private_data))
1745                 return -EINVAL;
1746 #endif
1747
1748         req = (struct srp_login_req *)private_data;
1749
1750         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1751
1752         PRINT_INFO("Received SRP_LOGIN_REQ with"
1753             " i_port_id 0x%llx:0x%llx, t_port_id 0x%llx:0x%llx and length %d"
1754             " on port %d (guid=0x%llx:0x%llx)",
1755             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1756             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1757             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1758             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1759             it_iu_len,
1760             param->port,
1761             (unsigned long long)be64_to_cpu(*(u64 *)
1762                                 &sdev->port[param->port - 1].gid.raw[0]),
1763             (unsigned long long)be64_to_cpu(*(u64 *)
1764                                 &sdev->port[param->port - 1].gid.raw[8]));
1765
1766         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1767         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1768         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1769
1770         if (!rsp || !rej || !rep_param) {
1771                 ret = -ENOMEM;
1772                 goto out;
1773         }
1774
1775         if (it_iu_len > srp_max_message_size || it_iu_len < 64) {
1776                 rej->reason =
1777                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1778                 ret = -EINVAL;
1779                 PRINT_ERROR("rejected SRP_LOGIN_REQ because its"
1780                             " length (%d bytes) is out of range (%d .. %d)",
1781                             it_iu_len, 64, srp_max_message_size);
1782                 goto reject;
1783         }
1784
1785         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1786                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1787
1788                 spin_lock_irq(&sdev->spinlock);
1789
1790                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1791                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1792                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1793                             && param->port == ch->sport->port
1794                             && param->listen_id == ch->sport->sdev->cm_id
1795                             && ch->cm_id) {
1796                                 enum rdma_ch_state prev_state;
1797
1798                                 /* found an existing channel */
1799                                 TRACE_DBG("Found existing channel name= %s"
1800                                           " cm_id= %p state= %d",
1801                                           ch->sess_name, ch->cm_id,
1802                                           atomic_read(&ch->state));
1803
1804                                 prev_state = atomic_xchg(&ch->state,
1805                                                 RDMA_CHANNEL_DISCONNECTING);
1806                                 if (prev_state == RDMA_CHANNEL_CONNECTING)
1807                                         list_del(&ch->list);
1808
1809                                 spin_unlock_irq(&sdev->spinlock);
1810
1811                                 rsp->rsp_flags =
1812                                         SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1813
1814                                 if (prev_state == RDMA_CHANNEL_LIVE) {
1815                                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
1816                                         PRINT_INFO("disconnected"
1817                                           " session %s because a new"
1818                                           " SRP_LOGIN_REQ has been received.",
1819                                           ch->sess_name);
1820                                 } else if (prev_state ==
1821                                          RDMA_CHANNEL_CONNECTING) {
1822                                         PRINT_ERROR("%s", "rejected"
1823                                           " SRP_LOGIN_REQ because another login"
1824                                           " request is being processed.");
1825                                         ib_send_cm_rej(ch->cm_id,
1826                                                        IB_CM_REJ_NO_RESOURCES,
1827                                                        NULL, 0, NULL, 0);
1828                                         scst_unregister_session(ch->scst_sess,
1829                                                         0,
1830                                                         srpt_release_channel);
1831                                 }
1832
1833                                 spin_lock_irq(&sdev->spinlock);
1834                         }
1835                 }
1836
1837                 spin_unlock_irq(&sdev->spinlock);
1838
1839         } else
1840                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1841
1842         if (((u64) (*(u64 *) req->target_port_id) !=
1843              cpu_to_be64(srpt_service_guid)) ||
1844             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1845              cpu_to_be64(srpt_service_guid))) {
1846                 rej->reason =
1847                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1848                 ret = -ENOMEM;
1849                 PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because it"
1850                        " has an invalid target port identifier.");
1851                 goto reject;
1852         }
1853
1854         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1855         if (!ch) {
1856                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1857                 PRINT_ERROR("%s",
1858                             "rejected SRP_LOGIN_REQ because out of memory.");
1859                 ret = -ENOMEM;
1860                 goto reject;
1861         }
1862
1863         spin_lock_init(&ch->spinlock);
1864         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1865         memcpy(ch->t_port_id, req->target_port_id, 16);
1866         ch->sport = &sdev->port[param->port - 1];
1867         ch->cm_id = cm_id;
1868         atomic_set(&ch->state, RDMA_CHANNEL_CONNECTING);
1869         INIT_LIST_HEAD(&ch->cmd_wait_list);
1870
1871         ret = srpt_create_ch_ib(ch);
1872         if (ret) {
1873                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1874                 PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because creating"
1875                             " a new RDMA channel failed.");
1876                 goto free_ch;
1877         }
1878
1879         ret = srpt_ch_qp_rtr(ch, ch->qp);
1880         if (ret) {
1881                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1882                 PRINT_ERROR("rejected SRP_LOGIN_REQ because enabling"
1883                        " RTR failed (error code = %d)", ret);
1884                 goto destroy_ib;
1885         }
1886
1887         if (use_port_guid_in_session_name) {
1888                 /*
1889                  * If the kernel module parameter use_port_guid_in_session_name
1890                  * has been specified, use a combination of the target port
1891                  * GUID and the initiator port ID as the session name. This
1892                  * was the original behavior of the SRP target implementation
1893                  * (i.e. before the SRPT was included in OFED 1.3).
1894                  */
1895                 snprintf(ch->sess_name, sizeof(ch->sess_name),
1896                          "0x%016llx%016llx",
1897                          (unsigned long long)be64_to_cpu(*(u64 *)
1898                                 &sdev->port[param->port - 1].gid.raw[8]),
1899                          (unsigned long long)be64_to_cpu(*(u64 *)
1900                                 (ch->i_port_id + 8)));
1901         } else {
1902                 /*
1903                  * Default behavior: use the initator port identifier as the
1904                  * session name.
1905                  */
1906                 snprintf(ch->sess_name, sizeof(ch->sess_name),
1907                          "0x%016llx%016llx",
1908                          (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1909                          (unsigned long long)be64_to_cpu(*(u64 *)
1910                                  (ch->i_port_id + 8)));
1911         }
1912
1913         TRACE_DBG("registering session %s", ch->sess_name);
1914
1915         BUG_ON(!sdev->scst_tgt);
1916         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1917                                               NULL, NULL);
1918         if (!ch->scst_sess) {
1919                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1920                 TRACE_DBG("%s", "Failed to create scst sess");
1921                 goto destroy_ib;
1922         }
1923
1924         TRACE_DBG("Establish connection sess=%p name=%s cm_id=%p",
1925                   ch->scst_sess, ch->sess_name, ch->cm_id);
1926
1927         scst_sess_set_tgt_priv(ch->scst_sess, ch);
1928
1929         /* create srp_login_response */
1930         rsp->opcode = SRP_LOGIN_RSP;
1931         rsp->tag = req->tag;
1932         rsp->max_it_iu_len = req->req_it_iu_len;
1933         rsp->max_ti_iu_len = req->req_it_iu_len;
1934         rsp->buf_fmt =
1935             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1936         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1937         atomic_set(&ch->req_lim_delta, 0);
1938
1939         /* create cm reply */
1940         rep_param->qp_num = ch->qp->qp_num;
1941         rep_param->private_data = (void *)rsp;
1942         rep_param->private_data_len = sizeof *rsp;
1943         rep_param->rnr_retry_count = 7;
1944         rep_param->flow_control = 1;
1945         rep_param->failover_accepted = 0;
1946         rep_param->srq = 1;
1947         rep_param->responder_resources = 4;
1948         rep_param->initiator_depth = 4;
1949
1950         ret = ib_send_cm_rep(cm_id, rep_param);
1951         if (ret) {
1952                 PRINT_ERROR("sending SRP_LOGIN_REQ response failed"
1953                             " (error code = %d)", ret);
1954                 goto release_channel;
1955         }
1956
1957         spin_lock_irq(&sdev->spinlock);
1958         list_add_tail(&ch->list, &sdev->rch_list);
1959         spin_unlock_irq(&sdev->spinlock);
1960
1961         goto out;
1962
1963 release_channel:
1964         atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING);
1965         scst_unregister_session(ch->scst_sess, 0, NULL);
1966         ch->scst_sess = NULL;
1967
1968 destroy_ib:
1969         ib_destroy_qp(ch->qp);
1970         ib_destroy_cq(ch->cq);
1971
1972 free_ch:
1973         kfree(ch);
1974
1975 reject:
1976         rej->opcode = SRP_LOGIN_REJ;
1977         rej->tag = req->tag;
1978         rej->buf_fmt =
1979             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1980
1981         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1982                              (void *)rej, sizeof *rej);
1983
1984 out:
1985         kfree(rep_param);
1986         kfree(rsp);
1987         kfree(rej);
1988
1989         return ret;
1990 }
1991
1992 static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
1993 {
1994         PRINT_INFO("Received InfiniBand REJ packet for cm_id %p.", cm_id);
1995         srpt_find_channel(cm_id, true);
1996 }
1997
1998 /**
1999  * Process an IB_CM_RTU_RECEIVED or IB_CM_USER_ESTABLISHED event.
2000  *
2001  * An IB_CM_RTU_RECEIVED message indicates that the connection is established
2002  * and that the recipient may begin transmitting (RTU = ready to use).
2003  */
2004 static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
2005 {
2006         struct srpt_rdma_ch *ch;
2007         int ret;
2008
2009         ch = srpt_find_channel(cm_id, false);
2010         WARN_ON(!ch);
2011         if (!ch)
2012                 goto out;
2013
2014         if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_CONNECTING,
2015                         RDMA_CHANNEL_LIVE) == RDMA_CHANNEL_CONNECTING) {
2016                 struct srpt_ioctx *ioctx, *ioctx_tmp;
2017
2018                 ret = srpt_ch_qp_rts(ch, ch->qp);
2019
2020                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
2021                                          wait_list) {
2022                         list_del(&ioctx->wait_list);
2023                         srpt_handle_new_iu(ch, ioctx);
2024                 }
2025                 if (ret && srpt_test_and_set_channel_state(ch,
2026                         RDMA_CHANNEL_LIVE,
2027                         RDMA_CHANNEL_DISCONNECTING) == RDMA_CHANNEL_LIVE) {
2028                         TRACE_DBG("cm_id=%p sess_name=%s state=%d",
2029                                   cm_id, ch->sess_name,
2030                                   atomic_read(&ch->state));
2031                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
2032                 }
2033         }
2034
2035 out:
2036         ;
2037 }
2038
2039 static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
2040 {
2041         PRINT_INFO("Received InfiniBand TimeWait exit for cm_id %p.", cm_id);
2042         srpt_find_channel(cm_id, true);
2043 }
2044
2045 static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
2046 {
2047         PRINT_INFO("Received InfiniBand REP error for cm_id %p.", cm_id);
2048         srpt_find_channel(cm_id, true);
2049 }
2050
2051 static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
2052 {
2053         struct srpt_rdma_ch *ch;
2054
2055         ch = srpt_find_channel(cm_id, false);
2056         WARN_ON(!ch);
2057         if (!ch)
2058                 goto out;
2059
2060         TRACE_DBG("cm_id= %p ch->state= %d", cm_id, atomic_read(&ch->state));
2061
2062         switch (atomic_read(&ch->state)) {
2063         case RDMA_CHANNEL_LIVE:
2064         case RDMA_CHANNEL_CONNECTING:
2065                 ib_send_cm_drep(ch->cm_id, NULL, 0);
2066                 PRINT_INFO("Received DREQ and sent DREP for session %s.",
2067                            ch->sess_name);
2068                 break;
2069         case RDMA_CHANNEL_DISCONNECTING:
2070         default:
2071                 break;
2072         }
2073
2074 out:
2075         ;
2076 }
2077
2078 static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
2079 {
2080         PRINT_INFO("Received InfiniBand DREP message for cm_id %p.", cm_id);
2081         srpt_find_channel(cm_id, true);
2082 }
2083
2084 /**
2085  * IB connection manager callback function.
2086  *
2087  * A non-zero return value will cause the caller destroy the CM ID.
2088  *
2089  * Note: srpt_cm_handler() must only return a non-zero value when transferring
2090  * ownership of the cm_id to a channel by srpt_cm_req_recv() failed. Returning
2091  * a non-zero value in any other case will trigger a race with the
2092  * ib_destroy_cm_id() call in srpt_release_channel().
2093  */
2094 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2095 {
2096         int ret;
2097
2098         ret = 0;
2099         switch (event->event) {
2100         case IB_CM_REQ_RECEIVED:
2101                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
2102                                        event->private_data);
2103                 break;
2104         case IB_CM_REJ_RECEIVED:
2105                 srpt_cm_rej_recv(cm_id);
2106                 break;
2107         case IB_CM_RTU_RECEIVED:
2108         case IB_CM_USER_ESTABLISHED:
2109                 srpt_cm_rtu_recv(cm_id);
2110                 break;
2111         case IB_CM_DREQ_RECEIVED:
2112                 srpt_cm_dreq_recv(cm_id);
2113                 break;
2114         case IB_CM_DREP_RECEIVED:
2115                 srpt_cm_drep_recv(cm_id);
2116                 break;
2117         case IB_CM_TIMEWAIT_EXIT:
2118                 srpt_cm_timewait_exit(cm_id);
2119                 break;
2120         case IB_CM_REP_ERROR:
2121                 srpt_cm_rep_error(cm_id);
2122                 break;
2123         default:
2124                 break;
2125         }
2126
2127         return ret;
2128 }
2129
2130 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
2131                                  struct srpt_ioctx *ioctx,
2132                                  struct scst_cmd *scmnd)
2133 {
2134         struct scatterlist *scat;
2135         scst_data_direction dir;
2136         struct rdma_iu *riu;
2137         struct srp_direct_buf *db;
2138         dma_addr_t dma_addr;
2139         struct ib_sge *sge;
2140         u64 raddr;
2141         u32 rsize;
2142         u32 tsize;
2143         u32 dma_len;
2144         int count, nrdma;
2145         int i, j, k;
2146
2147         scat = scst_cmd_get_sg(scmnd);
2148         dir = scst_cmd_get_data_direction(scmnd);
2149         WARN_ON(scat == NULL);
2150         count = ib_dma_map_sg(ch->sport->sdev->device, scat,
2151                               scst_cmd_get_sg_cnt(scmnd),
2152                               scst_to_tgt_dma_dir(dir));
2153         if (unlikely(!count))
2154                 return -EBUSY;
2155
2156         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
2157                 nrdma = ioctx->n_rdma_ius;
2158         else {
2159                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
2160
2161                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
2162                                           scst_cmd_atomic(scmnd)
2163                                           ? GFP_ATOMIC : GFP_KERNEL);
2164                 if (!ioctx->rdma_ius) {
2165                         WARN_ON(scat == NULL);
2166                         ib_dma_unmap_sg(ch->sport->sdev->device,
2167                                         scat, scst_cmd_get_sg_cnt(scmnd),
2168                                         scst_to_tgt_dma_dir(dir));
2169                         return -ENOMEM;
2170                 }
2171
2172                 ioctx->n_rdma_ius = nrdma;
2173         }
2174
2175         db = ioctx->rbufs;
2176         tsize = (dir == SCST_DATA_READ) ?
2177                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2178         dma_len = sg_dma_len(&scat[0]);
2179         riu = ioctx->rdma_ius;
2180
2181         /*
2182          * For each remote desc - calculate the #ib_sge.
2183          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
2184          *      each remote desc rdma_iu is required a rdma wr;
2185          * else
2186          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
2187          *      another rdma wr
2188          */
2189         for (i = 0, j = 0;
2190              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2191                 rsize = be32_to_cpu(db->len);
2192                 raddr = be64_to_cpu(db->va);
2193                 riu->raddr = raddr;
2194                 riu->rkey = be32_to_cpu(db->key);
2195                 riu->sge_cnt = 0;
2196
2197                 /* calculate how many sge required for this remote_buf */
2198                 while (rsize > 0 && tsize > 0) {
2199
2200                         if (rsize >= dma_len) {
2201                                 tsize -= dma_len;
2202                                 rsize -= dma_len;
2203                                 raddr += dma_len;
2204
2205                                 if (tsize > 0) {
2206                                         ++j;
2207                                         if (j < count)
2208                                                 dma_len = sg_dma_len(&scat[j]);
2209                                 }
2210                         } else {
2211                                 tsize -= rsize;
2212                                 dma_len -= rsize;
2213                                 rsize = 0;
2214                         }
2215
2216                         ++riu->sge_cnt;
2217
2218                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
2219                                 ++ioctx->n_rdma;
2220                                 riu->sge =
2221                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
2222                                             scst_cmd_atomic(scmnd)
2223                                             ? GFP_ATOMIC : GFP_KERNEL);
2224                                 if (!riu->sge)
2225                                         goto free_mem;
2226
2227                                 ++riu;
2228                                 riu->sge_cnt = 0;
2229                                 riu->raddr = raddr;
2230                                 riu->rkey = be32_to_cpu(db->key);
2231                         }
2232                 }
2233
2234                 ++ioctx->n_rdma;
2235                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
2236                                    scst_cmd_atomic(scmnd)
2237                                    ? GFP_ATOMIC : GFP_KERNEL);
2238                 if (!riu->sge)
2239                         goto free_mem;
2240         }
2241
2242         db = ioctx->rbufs;
2243         scat = scst_cmd_get_sg(scmnd);
2244         tsize = (dir == SCST_DATA_READ) ?
2245                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2246         riu = ioctx->rdma_ius;
2247         dma_len = sg_dma_len(&scat[0]);
2248         dma_addr = sg_dma_address(&scat[0]);
2249
2250         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
2251         for (i = 0, j = 0;
2252              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2253                 rsize = be32_to_cpu(db->len);
2254                 sge = riu->sge;
2255                 k = 0;
2256
2257                 while (rsize > 0 && tsize > 0) {
2258                         sge->addr = dma_addr;
2259                         sge->lkey = ch->sport->sdev->mr->lkey;
2260
2261                         if (rsize >= dma_len) {
2262                                 sge->length =
2263                                         (tsize < dma_len) ? tsize : dma_len;
2264                                 tsize -= dma_len;
2265                                 rsize -= dma_len;
2266
2267                                 if (tsize > 0) {
2268                                         ++j;
2269                                         if (j < count) {
2270                                                 dma_len = sg_dma_len(&scat[j]);
2271                                                 dma_addr =
2272                                                     sg_dma_address(&scat[j]);
2273                                         }
2274                                 }
2275                         } else {
2276                                 sge->length = (tsize < rsize) ? tsize : rsize;
2277                                 tsize -= rsize;
2278                                 dma_len -= rsize;
2279                                 dma_addr += rsize;
2280                                 rsize = 0;
2281                         }
2282
2283                         ++k;
2284                         if (k == riu->sge_cnt && rsize > 0) {
2285                                 ++riu;
2286                                 sge = riu->sge;
2287                                 k = 0;
2288                         } else if (rsize > 0)
2289                                 ++sge;
2290                 }
2291         }
2292
2293         return 0;
2294
2295 free_mem:
2296         srpt_unmap_sg_to_ib_sge(ch, ioctx);
2297
2298         return -ENOMEM;
2299 }
2300
2301 static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
2302                                     struct srpt_ioctx *ioctx)
2303 {
2304         struct scst_cmd *scmnd;
2305         struct scatterlist *scat;
2306         scst_data_direction dir;
2307
2308         TRACE_ENTRY();
2309
2310         scmnd = ioctx->scmnd;
2311         BUG_ON(!scmnd);
2312         BUG_ON(ioctx != scst_cmd_get_tgt_priv(scmnd));
2313         scat = scst_cmd_get_sg(scmnd);
2314
2315         TRACE_DBG("n_rdma = %d; rdma_ius = %p; scat = %p\n",
2316                   ioctx->n_rdma, ioctx->rdma_ius, scat);
2317
2318         BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
2319
2320         while (ioctx->n_rdma)
2321                 kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
2322
2323         kfree(ioctx->rdma_ius);
2324         ioctx->rdma_ius = NULL;
2325
2326         if (scat) {
2327                 dir = scst_cmd_get_data_direction(scmnd);
2328                 ib_dma_unmap_sg(ch->sport->sdev->device,
2329                                 scat, scst_cmd_get_sg_cnt(scmnd),
2330                                 scst_to_tgt_dma_dir(dir));
2331         }
2332
2333         TRACE_EXIT();
2334 }
2335
2336 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2337                               scst_data_direction dir)
2338 {
2339         struct ib_send_wr wr;
2340         struct ib_send_wr *bad_wr;
2341         struct rdma_iu *riu;
2342         int i;
2343         int ret;
2344         int srq_wr_avail;
2345
2346         if (dir == SCST_DATA_WRITE) {
2347                 ret = -ENOMEM;
2348                 srq_wr_avail = atomic_sub_return(ioctx->n_rdma,
2349                                                  &ch->qp_wr_avail);
2350                 if (srq_wr_avail < 0) {
2351                         atomic_add(ioctx->n_rdma, &ch->qp_wr_avail);
2352                         PRINT_INFO("%s[%d]: SRQ full", __func__, __LINE__);
2353                         goto out;
2354                 }
2355         }
2356
2357         ret = 0;
2358         riu = ioctx->rdma_ius;
2359         memset(&wr, 0, sizeof wr);
2360
2361         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
2362                 wr.opcode = (dir == SCST_DATA_READ) ?
2363                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
2364                 wr.next = NULL;
2365                 wr.wr_id = ioctx->index;
2366                 wr.wr.rdma.remote_addr = riu->raddr;
2367                 wr.wr.rdma.rkey = riu->rkey;
2368                 wr.num_sge = riu->sge_cnt;
2369                 wr.sg_list = riu->sge;
2370
2371                 /* only get completion event for the last rdma wr */
2372                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
2373                         wr.send_flags = IB_SEND_SIGNALED;
2374
2375                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
2376                 if (ret)
2377                         goto out;
2378         }
2379
2380 out:
2381         return ret;
2382 }
2383
2384 /*
2385  * Start data transfer between initiator and target. Must not block.
2386  */
2387 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2388                           struct scst_cmd *scmnd)
2389 {
2390         int ret;
2391
2392         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
2393         if (ret) {
2394                 PRINT_ERROR("%s[%d] ret=%d", __func__, __LINE__, ret);
2395                 ret = SCST_TGT_RES_QUEUE_FULL;
2396                 goto out;
2397         }
2398
2399         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
2400         if (ret) {
2401                 if (ret == -EAGAIN || ret == -ENOMEM) {
2402                         PRINT_INFO("%s[%d] queue full -- ret=%d",
2403                                    __func__, __LINE__, ret);
2404                         ret = SCST_TGT_RES_QUEUE_FULL;
2405                 } else {
2406                         PRINT_ERROR("%s[%d] fatal error -- ret=%d",
2407                                     __func__, __LINE__, ret);
2408                         ret = SCST_TGT_RES_FATAL_ERROR;
2409                 }
2410                 goto out_unmap;
2411         }
2412
2413         ret = SCST_TGT_RES_SUCCESS;
2414
2415 out:
2416         return ret;
2417 out_unmap:
2418         srpt_unmap_sg_to_ib_sge(ch, ioctx);
2419         goto out;
2420 }
2421
2422 /*
2423  * Called by the SCST core to inform ib_srpt that data reception from the
2424  * initiator should start (SCST_DATA_WRITE). Must not block.
2425  */
2426 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
2427 {
2428         struct srpt_rdma_ch *ch;
2429         struct srpt_ioctx *ioctx;
2430         enum rdma_ch_state ch_state;
2431         int ret;
2432
2433         ioctx = scst_cmd_get_tgt_priv(scmnd);
2434         BUG_ON(!ioctx);
2435
2436         WARN_ON(srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA)
2437                 == SRPT_STATE_ABORTED);
2438
2439         ch = ioctx->ch;
2440         WARN_ON(ch != scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd)));
2441         BUG_ON(!ch);
2442
2443         ch_state = atomic_read(&ch->state);
2444         if (ch_state == RDMA_CHANNEL_DISCONNECTING) {
2445                 TRACE_DBG("cmd with tag %lld: channel disconnecting",
2446                           scst_cmd_get_tag(scmnd));
2447                 ret = SCST_TGT_RES_FATAL_ERROR;
2448                 goto out;
2449         } else if (ch_state == RDMA_CHANNEL_CONNECTING) {
2450                 ret = SCST_TGT_RES_QUEUE_FULL;
2451                 goto out;
2452         }
2453         ret = srpt_xfer_data(ch, ioctx, scmnd);
2454
2455 out:
2456         return ret;
2457 }
2458
2459 /*
2460  * Called by the SCST core. Transmits the response buffer and status held in
2461  * 'scmnd'. Must not block.
2462  */
2463 static int srpt_xmit_response(struct scst_cmd *scmnd)
2464 {
2465         struct srpt_rdma_ch *ch;
2466         struct srpt_ioctx *ioctx;
2467         struct srp_rsp *srp_rsp;
2468         u64 tag;
2469         int ret = SCST_TGT_RES_SUCCESS;
2470         int dir;
2471         int status;
2472
2473         ioctx = scst_cmd_get_tgt_priv(scmnd);
2474         BUG_ON(!ioctx);
2475
2476         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2477         BUG_ON(!ch);
2478
2479         if (unlikely(scst_cmd_aborted(scmnd))) {
2480                 TRACE_DBG("cmd with tag %lld has been aborted",
2481                           scst_cmd_get_tag(scmnd));
2482                 srpt_abort_scst_cmd(ch->sport->sdev, scmnd);
2483                 ret = SCST_TGT_RES_SUCCESS;
2484                 goto out;
2485         }
2486
2487         if (srpt_set_cmd_state(ioctx, SRPT_STATE_PROCESSED)
2488             == SRPT_STATE_ABORTED) {
2489                 ret = SCST_TGT_RES_SUCCESS;
2490                 goto out;
2491         }
2492
2493         tag = scst_cmd_get_tag(scmnd);
2494         dir = scst_cmd_get_data_direction(scmnd);
2495         status = scst_cmd_get_status(scmnd) & 0xff;
2496
2497         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2498
2499         srp_rsp = ioctx->buf;
2500
2501         if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2502                 unsigned int max_sense_len;
2503
2504                 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2505                 BUILD_BUG_ON(MIN_MAX_MESSAGE_SIZE <= sizeof(*srp_rsp));
2506                 WARN_ON(srp_max_message_size <= sizeof(*srp_rsp));
2507                 max_sense_len = srp_max_message_size - sizeof(*srp_rsp);
2508                 if (srp_rsp->sense_data_len > max_sense_len) {
2509                         PRINT_WARNING("truncated sense data from %d to %d"
2510                                 " bytes", srp_rsp->sense_data_len,
2511                                 max_sense_len);
2512                         srp_rsp->sense_data_len = max_sense_len;
2513                 }
2514
2515                 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2516                        srp_rsp->sense_data_len);
2517
2518                 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2519                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2520
2521                 if (!status)
2522                         status = SAM_STAT_CHECK_CONDITION;
2523         }
2524
2525         srp_rsp->status = status;
2526
2527         /* For read commands, transfer the data to the initiator. */
2528         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2529                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2530                 if (ret != SCST_TGT_RES_SUCCESS) {
2531                         PRINT_ERROR("%s: tag= %lld xfer_data failed",
2532                                     __func__, (unsigned long long)tag);
2533                         goto out;
2534                 }
2535         }
2536
2537         if (srpt_post_send(ch, ioctx,
2538                            sizeof *srp_rsp +
2539                            be32_to_cpu(srp_rsp->sense_data_len))) {
2540                 PRINT_ERROR("%s[%d]: ch->state= %d tag= %lld",
2541                             __func__, __LINE__, atomic_read(&ch->state),
2542                             (unsigned long long)tag);
2543                 ret = SCST_TGT_RES_FATAL_ERROR;
2544         }
2545
2546 out:
2547         return ret;
2548 }
2549
2550 /*
2551  * Called by the SCST core to inform ib_srpt that a received task management
2552  * function has been completed. Must not block.
2553  */
2554 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2555 {
2556         struct srpt_rdma_ch *ch;
2557         struct srpt_mgmt_ioctx *mgmt_ioctx;
2558         struct srpt_ioctx *ioctx;
2559         int rsp_len;
2560
2561         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2562         BUG_ON(!mgmt_ioctx);
2563
2564         ch = mgmt_ioctx->ch;
2565         BUG_ON(!ch);
2566
2567         ioctx = mgmt_ioctx->ioctx;
2568         BUG_ON(!ioctx);
2569
2570         TRACE_DBG("%s: tsk_mgmt_done for tag= %lld status=%d",
2571                   __func__, (unsigned long long)mgmt_ioctx->tag,
2572                   scst_mgmt_cmd_get_status(mcmnd));
2573
2574         if (srpt_set_cmd_state(ioctx, SRPT_STATE_PROCESSED)
2575             == SRPT_STATE_ABORTED)
2576                 goto out;
2577
2578         rsp_len = srpt_build_tskmgmt_rsp(ch, ioctx,
2579                                          (scst_mgmt_cmd_get_status(mcmnd) ==
2580                                           SCST_MGMT_STATUS_SUCCESS) ?
2581                                          SRP_TSK_MGMT_SUCCESS :
2582                                          SRP_TSK_MGMT_FAILED,
2583                                          mgmt_ioctx->tag);
2584         srpt_post_send(ch, ioctx, rsp_len);
2585
2586         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2587
2588         kfree(mgmt_ioctx);
2589
2590 out:
2591         ;
2592 }
2593
2594 /*
2595  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2596  * to be freed. May be called in IRQ context.
2597  */
2598 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2599 {
2600         struct srpt_rdma_ch *ch;
2601         struct srpt_ioctx *ioctx;
2602
2603         ioctx = scst_cmd_get_tgt_priv(scmnd);
2604         BUG_ON(!ioctx);
2605
2606         srpt_set_cmd_state(ioctx, SRPT_STATE_ABORTED);
2607         ch = ioctx->ch;
2608         BUG_ON(!ch);
2609         ioctx->ch = NULL;
2610
2611         srpt_reset_ioctx(ch, ioctx);
2612         scst_cmd_set_tgt_priv(scmnd, NULL);
2613 }
2614
2615 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2616 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2617 static void srpt_refresh_port_work(void *ctx)
2618 #else
2619 static void srpt_refresh_port_work(struct work_struct *work)
2620 #endif
2621 {
2622 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2623         struct srpt_port *sport = (struct srpt_port *)ctx;
2624 #else
2625         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2626 #endif
2627
2628         srpt_refresh_port(sport);
2629 }
2630
2631 /*
2632  * Called by the SCST core to detect target adapters. Returns the number of
2633  * detected target adapters.
2634  */
2635 static int srpt_detect(struct scst_tgt_template *tp)
2636 {
2637         int device_count;
2638
2639         TRACE_ENTRY();
2640
2641         device_count = atomic_read(&srpt_device_count);
2642
2643         TRACE_EXIT_RES(device_count);
2644
2645         return device_count;
2646 }
2647
2648 /*
2649  * Callback function called by the SCST core from scst_unregister() to free up
2650  * the resources associated with device scst_tgt.
2651  */
2652 static int srpt_release(struct scst_tgt *scst_tgt)
2653 {
2654         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2655         struct srpt_rdma_ch *ch, *tmp_ch;
2656
2657         TRACE_ENTRY();
2658
2659         BUG_ON(!scst_tgt);
2660 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2661         WARN_ON(!sdev);
2662         if (!sdev)
2663                 return -ENODEV;
2664 #else
2665         if (WARN_ON(!sdev))
2666                 return -ENODEV;
2667 #endif
2668
2669 #ifdef CONFIG_SCST_PROC
2670         srpt_unregister_procfs_entry(scst_tgt->tgtt);
2671 #endif /*CONFIG_SCST_PROC*/
2672
2673         spin_lock_irq(&sdev->spinlock);
2674         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
2675                 list_del(&ch->list);
2676                 atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING);
2677                 spin_unlock_irq(&sdev->spinlock);
2678                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
2679                 scst_unregister_session(ch->scst_sess, true,
2680                                         srpt_release_channel);
2681                 spin_lock_irq(&sdev->spinlock);
2682         }
2683         spin_unlock_irq(&sdev->spinlock);
2684
2685         srpt_unregister_mad_agent(sdev);
2686
2687         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2688
2689         TRACE_EXIT();
2690
2691         return 0;
2692 }
2693
2694 /*
2695  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2696  * when the module parameter 'thread' is not zero (the default is zero).
2697  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2698  *
2699  * @pre thread != 0
2700  */
2701 static int srpt_ioctx_thread(void *arg)
2702 {
2703         struct srpt_ioctx *ioctx;
2704
2705         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2706         current->flags |= PF_NOFREEZE;
2707
2708         spin_lock_irq(&srpt_thread.thread_lock);
2709         while (!kthread_should_stop()) {
2710                 wait_queue_t wait;
2711                 init_waitqueue_entry(&wait, current);
2712
2713                 if (!srpt_test_ioctx_list()) {
2714                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2715
2716                         for (;;) {
2717                                 set_current_state(TASK_INTERRUPTIBLE);
2718                                 if (srpt_test_ioctx_list())
2719                                         break;
2720                                 spin_unlock_irq(&srpt_thread.thread_lock);
2721                                 schedule();
2722                                 spin_lock_irq(&srpt_thread.thread_lock);
2723                         }
2724                         set_current_state(TASK_RUNNING);
2725                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2726                 }
2727
2728                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2729                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2730                                            struct srpt_ioctx, comp_list);
2731
2732                         list_del(&ioctx->comp_list);
2733
2734                         spin_unlock_irq(&srpt_thread.thread_lock);
2735                         switch (ioctx->op) {
2736                         case IB_WC_SEND:
2737                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2738                                         SCST_CONTEXT_DIRECT);
2739                                 break;
2740                         case IB_WC_RDMA_WRITE:
2741                         case IB_WC_RDMA_READ:
2742                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2743                                 break;
2744                         case IB_WC_RECV:
2745                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2746                                 break;
2747                         default:
2748                                 break;
2749                         }
2750 #if defined(CONFIG_SCST_DEBUG)
2751                         if (thread_processing_delay_in_us
2752                             <= MAX_UDELAY_MS * 1000)
2753                                 udelay(thread_processing_delay_in_us);
2754 #endif
2755                         spin_lock_irq(&srpt_thread.thread_lock);
2756                 }
2757         }
2758         spin_unlock_irq(&srpt_thread.thread_lock);
2759
2760         return 0;
2761 }
2762
2763 /* SCST target template for the SRP target implementation. */
2764 static struct scst_tgt_template srpt_template = {
2765         .name = DRV_NAME,
2766         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2767         .xmit_response_atomic = 1,
2768         .rdy_to_xfer_atomic = 1,
2769         .detect = srpt_detect,
2770         .release = srpt_release,
2771         .xmit_response = srpt_xmit_response,
2772         .rdy_to_xfer = srpt_rdy_to_xfer,
2773         .on_free_cmd = srpt_on_free_cmd,
2774         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2775 };
2776
2777 /*
2778  * The callback function srpt_release_class_dev() is called whenever a
2779  * device is removed from the /sys/class/infiniband_srpt device class.
2780  * Although this function has been left empty, a release function has been
2781  * defined such that upon module removal no complaint is logged about a
2782  * missing release function.
2783  */
2784 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2785 static void srpt_release_class_dev(struct class_device *class_dev)
2786 #else
2787 static void srpt_release_class_dev(struct device *dev)
2788 #endif
2789 {
2790 }
2791
2792 #ifdef CONFIG_SCST_PROC
2793
2794 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2795 static int srpt_trace_level_show(struct seq_file *seq, void *v)
2796 {
2797         return scst_proc_log_entry_read(seq, trace_flag, NULL);
2798 }
2799
2800 static ssize_t srpt_proc_trace_level_write(struct file *file,
2801         const char __user *buf, size_t length, loff_t *off)
2802 {
2803         return scst_proc_log_entry_write(file, buf, length, &trace_flag,
2804                 DEFAULT_SRPT_TRACE_FLAGS, NULL);
2805 }
2806
2807 static struct scst_proc_data srpt_log_proc_data = {
2808         SCST_DEF_RW_SEQ_OP(srpt_proc_trace_level_write)
2809         .show = srpt_trace_level_show,
2810 };
2811 #endif
2812
2813 #endif /* CONFIG_SCST_PROC */
2814
2815 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2816 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2817 #else
2818 static ssize_t show_login_info(struct device *dev,
2819                                struct device_attribute *attr, char *buf)
2820 #endif
2821 {
2822         struct srpt_device *sdev =
2823 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2824                 container_of(class_dev, struct srpt_device, class_dev);
2825 #else
2826                 container_of(dev, struct srpt_device, dev);
2827 #endif
2828         struct srpt_port *sport;
2829         int i;
2830         int len = 0;
2831
2832         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2833                 sport = &sdev->port[i];
2834
2835                 len += sprintf(buf + len,
2836                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2837                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2838                                "service_id=%016llx\n",
2839                                (unsigned long long) srpt_service_guid,
2840                                (unsigned long long) srpt_service_guid,
2841                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2842                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2843                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2844                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2845                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2846                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2847                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2848                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2849                                (unsigned long long) srpt_service_guid);
2850         }
2851
2852         return len;
2853 }
2854
2855 static struct class_attribute srpt_class_attrs[] = {
2856         __ATTR_NULL,
2857 };
2858
2859 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2860 static struct class_device_attribute srpt_dev_attrs[] = {
2861 #else
2862 static struct device_attribute srpt_dev_attrs[] = {
2863 #endif
2864         __ATTR(login_info, S_IRUGO, show_login_info, NULL),
2865         __ATTR_NULL,
2866 };
2867
2868 static struct class srpt_class = {
2869         .name        = "infiniband_srpt",
2870 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2871         .release = srpt_release_class_dev,
2872 #else
2873         .dev_release = srpt_release_class_dev,
2874 #endif
2875         .class_attrs = srpt_class_attrs,
2876 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2877         .class_dev_attrs = srpt_dev_attrs,
2878 #else
2879         .dev_attrs   = srpt_dev_attrs,
2880 #endif
2881 };
2882
2883 /*
2884  * Callback function called by the InfiniBand core when either an InfiniBand
2885  * device has been added or during the ib_register_client() call for each
2886  * registered InfiniBand device.
2887  */
2888 static void srpt_add_one(struct ib_device *device)
2889 {
2890         struct srpt_device *sdev;
2891         struct srpt_port *sport;
2892         struct ib_srq_init_attr srq_attr;
2893         int i;
2894
2895         TRACE_ENTRY();
2896
2897         TRACE_DBG("device = %p, device->dma_ops = %p", device, device->dma_ops);
2898
2899         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2900         if (!sdev)
2901                 return;
2902
2903         sdev->device = device;
2904
2905 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2906         sdev->class_dev.class = &srpt_class;
2907         sdev->class_dev.dev = device->dma_device;
2908         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2909                  "srpt-%s", device->name);
2910 #else
2911         sdev->dev.class = &srpt_class;
2912         sdev->dev.parent = device->dma_device;
2913 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2914         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2915 #else
2916         dev_set_name(&sdev->dev, "srpt-%s", device->name);
2917 #endif
2918 #endif
2919
2920 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2921         if (class_device_register(&sdev->class_dev))
2922                 goto free_dev;
2923 #else
2924         if (device_register(&sdev->dev))
2925                 goto free_dev;
2926 #endif
2927
2928         if (ib_query_device(device, &sdev->dev_attr))
2929                 goto err_dev;
2930
2931         sdev->pd = ib_alloc_pd(device);
2932         if (IS_ERR(sdev->pd))
2933                 goto err_dev;
2934
2935         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2936         if (IS_ERR(sdev->mr))
2937                 goto err_pd;
2938
2939         srq_attr.event_handler = srpt_srq_event;
2940         srq_attr.srq_context = (void *)sdev;
2941         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2942         srq_attr.attr.max_sge = 1;
2943         srq_attr.attr.srq_limit = 0;
2944
2945         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2946         if (IS_ERR(sdev->srq))
2947                 goto err_mr;
2948
2949         TRACE_DBG("%s: create SRQ #wr= %d max_allow=%d dev= %s",
2950                __func__, srq_attr.attr.max_wr,
2951               sdev->dev_attr.max_srq_wr, device->name);
2952
2953         if (!srpt_service_guid)
2954                 srpt_service_guid = be64_to_cpu(device->node_guid);
2955
2956         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2957         if (IS_ERR(sdev->cm_id))
2958                 goto err_srq;
2959
2960         /* print out target login information */
2961         TRACE_DBG("Target login info: id_ext=%016llx,"
2962                   "ioc_guid=%016llx,pkey=ffff,service_id=%016llx",
2963                   (unsigned long long) srpt_service_guid,
2964                   (unsigned long long) srpt_service_guid,
2965                   (unsigned long long) srpt_service_guid);
2966
2967         /*
2968          * We do not have a consistent service_id (ie. also id_ext of target_id)
2969          * to identify this target. We currently use the guid of the first HCA
2970          * in the system as service_id; therefore, the target_id will change
2971          * if this HCA is gone bad and replaced by different HCA
2972          */
2973         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0, NULL))
2974                 goto err_cm;
2975
2976         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2977                               srpt_event_handler);
2978         if (ib_register_event_handler(&sdev->event_handler))
2979                 goto err_cm;
2980
2981         if (srpt_alloc_ioctx_ring(sdev))
2982                 goto err_event;
2983
2984         INIT_LIST_HEAD(&sdev->rch_list);
2985         spin_lock_init(&sdev->spinlock);
2986
2987         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2988                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2989
2990         ib_set_client_data(device, &srpt_client, sdev);
2991
2992         sdev->scst_tgt = scst_register(&srpt_template, NULL);
2993         if (!sdev->scst_tgt) {
2994                 PRINT_ERROR("SCST registration failed for %s.",
2995                             sdev->device->name);
2996                 goto err_ring;
2997         }
2998
2999         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
3000
3001         WARN_ON(sdev->device->phys_port_cnt
3002                 > sizeof(sdev->port)/sizeof(sdev->port[0]));
3003
3004         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
3005                 sport = &sdev->port[i - 1];
3006                 sport->sdev = sdev;
3007                 sport->port = i;
3008 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
3009                 /*
3010                  * A vanilla 2.6.19 or older kernel without backported OFED
3011                  * kernel headers.
3012                  */
3013                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
3014 #else
3015                 INIT_WORK(&sport->work, srpt_refresh_port_work);
3016 #endif
3017                 if (srpt_refresh_port(sport)) {
3018                         PRINT_ERROR("MAD registration failed for %s-%d.",
3019                                     sdev->device->name, i);
3020                         goto err_refresh_port;
3021                 }
3022         }
3023
3024         atomic_inc(&srpt_device_count);
3025
3026         TRACE_EXIT();
3027
3028         return;
3029
3030 err_refresh_port:
3031         scst_unregister(sdev->scst_tgt);
3032 err_ring:
3033         ib_set_client_data(device, &srpt_client, NULL);
3034         srpt_free_ioctx_ring(sdev);
3035 err_event:
3036         ib_unregister_event_handler(&sdev->event_handler);
3037 err_cm:
3038         ib_destroy_cm_id(sdev->cm_id);
3039 err_srq:
3040         ib_destroy_srq(sdev->srq);
3041 err_mr:
3042         ib_dereg_mr(sdev->mr);
3043 err_pd:
3044         ib_dealloc_pd(sdev->pd);
3045 err_dev:
3046 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3047         class_device_unregister(&sdev->class_dev);
3048 #else
3049         device_unregister(&sdev->dev);
3050 #endif
3051 free_dev:
3052         kfree(sdev);
3053
3054         TRACE_EXIT();
3055 }
3056
3057 /*
3058  * Callback function called by the InfiniBand core when either an InfiniBand
3059  * device has been removed or during the ib_unregister_client() call for each
3060  * registered InfiniBand device.
3061  */
3062 static void srpt_remove_one(struct ib_device *device)
3063 {
3064         int i;
3065         struct srpt_device *sdev;
3066
3067         TRACE_ENTRY();
3068
3069         sdev = ib_get_client_data(device, &srpt_client);
3070 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
3071         WARN_ON(!sdev);
3072         if (!sdev)
3073                 return;
3074 #else
3075         if (WARN_ON(!sdev))
3076                 return;
3077 #endif
3078
3079         /*
3080          * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
3081          * finished if it is running.
3082          */
3083         for (i = 0; i < sdev->device->phys_port_cnt; i++)
3084 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
3085                 cancel_work_sync(&sdev->port[i].work);
3086 #else
3087                 /*
3088                  * cancel_work_sync() was introduced in kernel 2.6.22. Older
3089                  * kernels do not have a facility to cancel scheduled work.
3090                  */
3091                 PRINT_ERROR("%s",
3092                        "your kernel does not provide cancel_work_sync().");
3093 #endif
3094
3095         scst_unregister(sdev->scst_tgt);
3096         sdev->scst_tgt = NULL;
3097
3098         ib_unregister_event_handler(&sdev->event_handler);
3099         ib_destroy_cm_id(sdev->cm_id);
3100         ib_destroy_srq(sdev->srq);
3101         ib_dereg_mr(sdev->mr);
3102         ib_dealloc_pd(sdev->pd);
3103 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3104         class_device_unregister(&sdev->class_dev);
3105 #else
3106         device_unregister(&sdev->dev);
3107 #endif
3108
3109         srpt_free_ioctx_ring(sdev);
3110         kfree(sdev);
3111
3112         TRACE_EXIT();
3113 }
3114
3115 #ifdef CONFIG_SCST_PROC
3116
3117 /**
3118  * Create procfs entries for srpt. Currently the only procfs entry created
3119  * by this function is the "trace_level" entry.
3120  */
3121 static int srpt_register_procfs_entry(struct scst_tgt_template *tgt)
3122 {
3123         int res = 0;
3124 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
3125         struct proc_dir_entry *p, *root;
3126
3127         root = scst_proc_get_tgt_root(tgt);
3128         WARN_ON(!root);
3129         if (root) {
3130                 /*
3131                  * Fill in the scst_proc_data::data pointer, which is used in
3132                  * a printk(KERN_INFO ...) statement in
3133                  * scst_proc_log_entry_write() in scst_proc.c.
3134                  */
3135                 srpt_log_proc_data.data = (char *)tgt->name;
3136                 p = scst_create_proc_entry(root, SRPT_PROC_TRACE_LEVEL_NAME,
3137                                            &srpt_log_proc_data);
3138                 if (!p)
3139                         res = -ENOMEM;
3140         } else
3141                 res = -ENOMEM;
3142
3143 #endif
3144         return res;
3145 }
3146
3147 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt)
3148 {
3149 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
3150         struct proc_dir_entry *root;
3151
3152         root = scst_proc_get_tgt_root(tgt);
3153         WARN_ON(!root);
3154         if (root)
3155                 remove_proc_entry(SRPT_PROC_TRACE_LEVEL_NAME, root);
3156 #endif
3157 }
3158
3159 #endif /*CONFIG_SCST_PROC*/
3160
3161 /*
3162  * Module initialization.
3163  *
3164  * Note: since ib_register_client() registers callback functions, and since at
3165  * least one of these callback functions (srpt_add_one()) calls SCST functions,
3166  * the SCST target template must be registered before ib_register_client() is
3167  * called.
3168  */
3169 static int __init srpt_init_module(void)
3170 {
3171         int ret;
3172
3173         ret = -EINVAL;
3174         if (srp_max_message_size < MIN_MAX_MESSAGE_SIZE) {
3175                 PRINT_ERROR("invalid value %d for kernel module parameter"
3176                             " srp_max_message_size -- must be at least %d.",
3177                             srp_max_message_size,
3178                             MIN_MAX_MESSAGE_SIZE);
3179                 goto out;
3180         }
3181
3182         ret = class_register(&srpt_class);
3183         if (ret) {
3184                 PRINT_ERROR("%s", "couldn't register class ib_srpt");
3185                 goto out;
3186         }
3187
3188         ret = scst_register_target_template(&srpt_template);
3189         if (ret < 0) {
3190                 PRINT_ERROR("%s", "couldn't register with scst");
3191                 ret = -ENODEV;
3192                 goto out_unregister_class;
3193         }
3194
3195 #ifdef CONFIG_SCST_PROC
3196         ret = srpt_register_procfs_entry(&srpt_template);
3197         if (ret) {
3198                 PRINT_ERROR("%s", "couldn't register procfs entry");
3199                 goto out_unregister_target;
3200         }
3201 #endif /*CONFIG_SCST_PROC*/
3202
3203         ret = ib_register_client(&srpt_client);
3204         if (ret) {
3205                 PRINT_ERROR("%s", "couldn't register IB client");
3206                 goto out_unregister_target;
3207         }
3208
3209         if (thread) {
3210                 spin_lock_init(&srpt_thread.thread_lock);
3211                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
3212                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
3213                                                  NULL, "srpt_thread");
3214                 if (IS_ERR(srpt_thread.thread)) {
3215                         srpt_thread.thread = NULL;
3216                         thread = 0;
3217                 }
3218         }
3219
3220         return 0;
3221
3222 out_unregister_target:
3223 #ifdef CONFIG_SCST_PROC
3224         /*
3225          * Note: the procfs entry is unregistered in srpt_release(), which is
3226          * called by scst_unregister_target_template().
3227          */
3228 #endif /*CONFIG_SCST_PROC*/
3229         scst_unregister_target_template(&srpt_template);
3230 out_unregister_class:
3231         class_unregister(&srpt_class);
3232 out:
3233         return ret;
3234 }
3235
3236 static void __exit srpt_cleanup_module(void)
3237 {
3238         TRACE_ENTRY();
3239
3240         if (srpt_thread.thread)
3241                 kthread_stop(srpt_thread.thread);
3242         ib_unregister_client(&srpt_client);
3243         scst_unregister_target_template(&srpt_template);
3244         class_unregister(&srpt_class);
3245
3246         TRACE_EXIT();
3247 }
3248
3249 module_init(srpt_init_module);
3250 module_exit(srpt_cleanup_module);