3ffe4de31368b60aa106aa17c8134ad7de9ee937
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43 #include <linux/delay.h>
44 #include <asm/atomic.h>
45 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
46 #include <linux/proc_fs.h>
47 #include <linux/seq_file.h>
48 #endif
49 #include "ib_srpt.h"
50 #include "scst_debug.h"
51
52 #define CONFIG_SCST_PROC
53
54 /* Name of this kernel module. */
55 #define DRV_NAME                "ib_srpt"
56 /* Prefix for printk() kernel messages. */
57 #define LOG_PFX                 DRV_NAME ": "
58 #define DRV_VERSION             "1.0.1"
59 #define DRV_RELDATE             "July 10, 2008"
60 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
61 /* Flags to be used in SCST debug tracing statements. */
62 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
63                                   | TRACE_MGMT | TRACE_SPECIAL)
64 /* Name of the entry that will be created under /proc/scsi_tgt/ib_srpt. */
65 #define SRPT_PROC_TRACE_LEVEL_NAME      "trace_level"
66 #endif
67
68 #define MELLANOX_SRPT_ID_STRING "SCST SRP target"
69
70 MODULE_AUTHOR("Vu Pham");
71 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
72                    "v" DRV_VERSION " (" DRV_RELDATE ")");
73 MODULE_LICENSE("Dual BSD/GPL");
74
75 struct srpt_thread {
76         /* Protects thread_ioctx_list. */
77         spinlock_t thread_lock;
78         /* I/O contexts to be processed by the kernel thread. */
79         struct list_head thread_ioctx_list;
80         /* SRPT kernel thread. */
81         struct task_struct *thread;
82 };
83
84 /*
85  * Global Variables
86  */
87
88 static u64 srpt_service_guid;
89 /* List of srpt_device structures. */
90 static atomic_t srpt_device_count;
91 static int use_port_guid_in_session_name;
92 static int thread = 1;
93 static struct srpt_thread srpt_thread;
94 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
95 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
96 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
97 module_param(trace_flag, long, 0644);
98 MODULE_PARM_DESC(trace_flag,
99                  "Trace flags for the ib_srpt kernel module.");
100 #endif
101 #if defined(CONFIG_SCST_DEBUG)
102 static unsigned long interrupt_processing_delay_in_us;
103 module_param(interrupt_processing_delay_in_us, long, 0744);
104 MODULE_PARM_DESC(interrupt_processing_delay_in_us,
105                  "CQ completion handler interrupt delay in microseconds.");
106 static unsigned long thread_processing_delay_in_us;
107 module_param(thread_processing_delay_in_us, long, 0744);
108 MODULE_PARM_DESC(thread_processing_delay_in_us,
109                  "SRP thread processing delay in microseconds.");
110 #endif
111
112 module_param(thread, int, 0444);
113 MODULE_PARM_DESC(thread,
114                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
115                  "where possible.");
116
117 static unsigned int srp_max_rdma_size = 65536;
118 module_param(srp_max_rdma_size, int, 0744);
119 MODULE_PARM_DESC(thread,
120                  "Maximum size of SRP RDMA transfers for new connections.");
121
122 static unsigned int srp_max_message_size = 4096;
123 module_param(srp_max_message_size, int, 0444);
124 MODULE_PARM_DESC(thread,
125                  "Maximum size of SRP control messages in bytes.");
126
127 module_param(use_port_guid_in_session_name, bool, 0444);
128 MODULE_PARM_DESC(use_port_guid_in_session_name,
129                  "Use target port ID in the SCST session name such that"
130                  " redundant paths between multiport systems can be masked.");
131
132 static void srpt_add_one(struct ib_device *device);
133 static void srpt_remove_one(struct ib_device *device);
134 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
135 #ifdef CONFIG_SCST_PROC
136 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt);
137 #endif /*CONFIG_SCST_PROC*/
138 static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
139                                     struct srpt_ioctx *ioctx,
140                                     struct scst_cmd *scmnd);
141 static void srpt_release_channel(struct scst_session *scst_sess);
142
143 static struct ib_client srpt_client = {
144         .name = DRV_NAME,
145         .add = srpt_add_one,
146         .remove = srpt_remove_one
147 };
148
149 /**
150  * Atomically test and set the channel state.
151  * @ch: RDMA channel.
152  * @old: channel state to compare with.
153  * @new: state to change the channel state to if the current state matches the
154  *       argument 'old'.
155  *
156  * Returns the previous channel state.
157  */
158 static enum rdma_ch_state
159 srpt_test_and_set_channel_state(struct srpt_rdma_ch *ch,
160                                 enum rdma_ch_state old,
161                                 enum rdma_ch_state new)
162 {
163         return atomic_cmpxchg(&ch->state, old, new);
164 }
165
166 /*
167  * Callback function called by the InfiniBand core when an asynchronous IB
168  * event occurs. This callback may occur in interrupt context. See also
169  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
170  * Architecture Specification.
171  */
172 static void srpt_event_handler(struct ib_event_handler *handler,
173                                struct ib_event *event)
174 {
175         struct srpt_device *sdev;
176         struct srpt_port *sport;
177
178         TRACE_ENTRY();
179
180         sdev = ib_get_client_data(event->device, &srpt_client);
181         if (!sdev || sdev->device != event->device)
182                 return;
183
184         TRACE_DBG("ASYNC event= %d on device= %s",
185                   event->event, sdev->device->name);
186
187         switch (event->event) {
188         case IB_EVENT_PORT_ERR:
189                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
190                         sport = &sdev->port[event->element.port_num - 1];
191                         sport->lid = 0;
192                         sport->sm_lid = 0;
193                 }
194                 break;
195         case IB_EVENT_PORT_ACTIVE:
196         case IB_EVENT_LID_CHANGE:
197         case IB_EVENT_PKEY_CHANGE:
198         case IB_EVENT_SM_CHANGE:
199         case IB_EVENT_CLIENT_REREGISTER:
200                 /*
201                  * Refresh port data asynchronously. Note: it is safe to call
202                  * schedule_work() even if &sport->work is already on the
203                  * global workqueue because schedule_work() tests for the
204                  * work_pending() condition before adding &sport->work to the
205                  * global work queue.
206                  */
207                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
208                         sport = &sdev->port[event->element.port_num - 1];
209                         if (!sport->lid && !sport->sm_lid)
210                                 schedule_work(&sport->work);
211                 }
212                 break;
213         default:
214                 break;
215         }
216
217         TRACE_EXIT();
218 }
219
220 /*
221  * Callback function called by the InfiniBand core for SRQ (shared receive
222  * queue) events.
223  */
224 static void srpt_srq_event(struct ib_event *event, void *ctx)
225 {
226         TRACE_ENTRY();
227
228         TRACE_DBG("SRQ event %d", event->event);
229
230         TRACE_EXIT();
231 }
232
233 /*
234  * Callback function called by the InfiniBand core for QP (queue pair) events.
235  */
236 static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
237 {
238         TRACE_DBG("QP event %d on cm_id=%p sess_name=%s state=%d",
239                   event->event, ch->cm_id, ch->sess_name,
240                   atomic_read(&ch->state));
241
242         switch (event->event) {
243         case IB_EVENT_COMM_EST:
244 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
245                 ib_cm_notify(ch->cm_id, event->event);
246 #else
247                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
248                 PRINT_ERROR("%s", "how to perform ib_cm_notify() on a"
249                             " vanilla 2.6.18 kernel ???");
250 #endif
251                 break;
252         case IB_EVENT_QP_LAST_WQE_REACHED:
253                 if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_LIVE,
254                         RDMA_CHANNEL_DISCONNECTING) == RDMA_CHANNEL_LIVE) {
255                         PRINT_INFO("disconnected session %s.", ch->sess_name);
256                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
257                 }
258                 break;
259         default:
260                 break;
261         }
262 }
263
264 /*
265  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
266  * the lowest four bits of value in element slot of the array of four bit
267  * elements called c_list (controller list). The index slot is one-based.
268  *
269  * @pre 1 <= slot && 0 <= value && value < 16
270  */
271 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
272 {
273         u16 id;
274         u8 tmp;
275
276         id = (slot - 1) / 2;
277         if (slot & 0x1) {
278                 tmp = c_list[id] & 0xf;
279                 c_list[id] = (value << 4) | tmp;
280         } else {
281                 tmp = c_list[id] & 0xf0;
282                 c_list[id] = (value & 0xf) | tmp;
283         }
284 }
285
286 /*
287  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
288  * ClassPortInfo in the InfiniBand Architecture Specification.
289  */
290 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
291 {
292         struct ib_class_port_info *cif;
293
294         cif = (struct ib_class_port_info *)mad->data;
295         memset(cif, 0, sizeof *cif);
296         cif->base_version = 1;
297         cif->class_version = 1;
298         cif->resp_time_value = 20;
299
300         mad->mad_hdr.status = 0;
301 }
302
303 /*
304  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
305  * InfiniBand Architecture Specification. See also section B.7,
306  * table B.6 in the T10 SRP r16a document.
307  */
308 static void srpt_get_iou(struct ib_dm_mad *mad)
309 {
310         struct ib_dm_iou_info *ioui;
311         u8 slot;
312         int i;
313
314         ioui = (struct ib_dm_iou_info *)mad->data;
315         ioui->change_id = 1;
316         ioui->max_controllers = 16;
317
318         /* set present for slot 1 and empty for the rest */
319         srpt_set_ioc(ioui->controller_list, 1, 1);
320         for (i = 1, slot = 2; i < 16; i++, slot++)
321                 srpt_set_ioc(ioui->controller_list, slot, 0);
322
323         mad->mad_hdr.status = 0;
324 }
325
326 /*
327  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
328  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
329  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
330  * document.
331  */
332 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
333                          struct ib_dm_mad *mad)
334 {
335         struct ib_dm_ioc_profile *iocp;
336
337         iocp = (struct ib_dm_ioc_profile *)mad->data;
338
339         if (!slot || slot > 16) {
340                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
341                 return;
342         }
343
344         if (slot > 2) {
345                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
346                 return;
347         }
348
349         memset(iocp, 0, sizeof *iocp);
350         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
351         iocp->guid = cpu_to_be64(srpt_service_guid);
352         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
353         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
354         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
355         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
356         iocp->subsys_device_id = 0x0;
357         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
358         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
359         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
360         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
361         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
362         iocp->rdma_read_depth = 4;
363         iocp->send_size = cpu_to_be32(srp_max_message_size);
364         iocp->rdma_size = cpu_to_be32(min(max(srp_max_rdma_size, 256U),
365                                           1U << 24));
366         iocp->num_svc_entries = 1;
367         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
368                 SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
369
370         mad->mad_hdr.status = 0;
371 }
372
373 /*
374  * Device management: write ServiceEntries to mad for the given slot. See also
375  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
376  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
377  */
378 static void srpt_get_svc_entries(u64 ioc_guid,
379                                  u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
380 {
381         struct ib_dm_svc_entries *svc_entries;
382
383         WARN_ON(!ioc_guid);
384
385         if (!slot || slot > 16) {
386                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
387                 return;
388         }
389
390         if (slot > 2 || lo > hi || hi > 1) {
391                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
392                 return;
393         }
394
395         svc_entries = (struct ib_dm_svc_entries *)mad->data;
396         memset(svc_entries, 0, sizeof *svc_entries);
397         svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid);
398         snprintf(svc_entries->service_entries[0].name,
399                  sizeof(svc_entries->service_entries[0].name),
400                  "%s%016llx",
401                  SRP_SERVICE_NAME_PREFIX,
402                  (unsigned long long)ioc_guid);
403
404         mad->mad_hdr.status = 0;
405 }
406
407 /*
408  * Actual processing of a received MAD *rq_mad received through source port *sp
409  * (MAD = InfiniBand management datagram). The response to be sent back is
410  * written to *rsp_mad.
411  */
412 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
413                                  struct ib_dm_mad *rsp_mad)
414 {
415         u16 attr_id;
416         u32 slot;
417         u8 hi, lo;
418
419         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
420         switch (attr_id) {
421         case DM_ATTR_CLASS_PORT_INFO:
422                 srpt_get_class_port_info(rsp_mad);
423                 break;
424         case DM_ATTR_IOU_INFO:
425                 srpt_get_iou(rsp_mad);
426                 break;
427         case DM_ATTR_IOC_PROFILE:
428                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
429                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
430                 break;
431         case DM_ATTR_SVC_ENTRIES:
432                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
433                 hi = (u8) ((slot >> 8) & 0xff);
434                 lo = (u8) (slot & 0xff);
435                 slot = (u16) ((slot >> 16) & 0xffff);
436                 srpt_get_svc_entries(srpt_service_guid,
437                                      slot, hi, lo, rsp_mad);
438                 break;
439         default:
440                 rsp_mad->mad_hdr.status =
441                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
442                 break;
443         }
444 }
445
446 /*
447  * Callback function that is called by the InfiniBand core after transmission of
448  * a MAD. (MAD = management datagram; AH = address handle.)
449  */
450 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
451                                   struct ib_mad_send_wc *mad_wc)
452 {
453         ib_destroy_ah(mad_wc->send_buf->ah);
454         ib_free_send_mad(mad_wc->send_buf);
455 }
456
457 /*
458  * Callback function that is called by the InfiniBand core after reception of
459  * a MAD (management datagram).
460  */
461 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
462                                   struct ib_mad_recv_wc *mad_wc)
463 {
464         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
465         struct ib_ah *ah;
466         struct ib_mad_send_buf *rsp;
467         struct ib_dm_mad *dm_mad;
468
469         if (!mad_wc || !mad_wc->recv_buf.mad)
470                 return;
471
472         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
473                                   mad_wc->recv_buf.grh, mad_agent->port_num);
474         if (IS_ERR(ah))
475                 goto err;
476
477         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
478
479         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
480                                  mad_wc->wc->pkey_index, 0,
481                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
482                                  GFP_KERNEL);
483         if (IS_ERR(rsp))
484                 goto err_rsp;
485
486         rsp->ah = ah;
487
488         dm_mad = rsp->mad;
489         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
490         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
491         dm_mad->mad_hdr.status = 0;
492
493         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
494         case IB_MGMT_METHOD_GET:
495                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
496                 break;
497         case IB_MGMT_METHOD_SET:
498                 dm_mad->mad_hdr.status =
499                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
500                 break;
501         default:
502                 dm_mad->mad_hdr.status =
503                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
504                 break;
505         }
506
507         if (!ib_post_send_mad(rsp, NULL)) {
508                 ib_free_recv_mad(mad_wc);
509                 /* will destroy_ah & free_send_mad in send completion */
510                 return;
511         }
512
513         ib_free_send_mad(rsp);
514
515 err_rsp:
516         ib_destroy_ah(ah);
517 err:
518         ib_free_recv_mad(mad_wc);
519 }
520
521 /*
522  * Enable InfiniBand management datagram processing, update the cached sm_lid,
523  * lid and gid values, and register a callback function for processing MADs
524  * on the specified port. It is safe to call this function more than once for
525  * the same port.
526  */
527 static int srpt_refresh_port(struct srpt_port *sport)
528 {
529         struct ib_mad_reg_req reg_req;
530         struct ib_port_modify port_modify;
531         struct ib_port_attr port_attr;
532         int ret;
533
534         TRACE_ENTRY();
535
536         memset(&port_modify, 0, sizeof port_modify);
537         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
538         port_modify.clr_port_cap_mask = 0;
539
540         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
541         if (ret)
542                 goto err_mod_port;
543
544         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
545         if (ret)
546                 goto err_query_port;
547
548         sport->sm_lid = port_attr.sm_lid;
549         sport->lid = port_attr.lid;
550
551         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
552         if (ret)
553                 goto err_query_port;
554
555         if (!sport->mad_agent) {
556                 memset(&reg_req, 0, sizeof reg_req);
557                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
558                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
559                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
560                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
561
562                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
563                                                          sport->port,
564                                                          IB_QPT_GSI,
565                                                          &reg_req, 0,
566                                                          srpt_mad_send_handler,
567                                                          srpt_mad_recv_handler,
568                                                          sport);
569                 if (IS_ERR(sport->mad_agent)) {
570                         ret = PTR_ERR(sport->mad_agent);
571                         sport->mad_agent = NULL;
572                         goto err_query_port;
573                 }
574         }
575
576         TRACE_EXIT_RES(0);
577
578         return 0;
579
580 err_query_port:
581
582         port_modify.set_port_cap_mask = 0;
583         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
584         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
585
586 err_mod_port:
587
588         TRACE_EXIT_RES(ret);
589
590         return ret;
591 }
592
593 /*
594  * Unregister the callback function for processing MADs and disable MAD
595  * processing for all ports of the specified device. It is safe to call this
596  * function more than once for the same device.
597  */
598 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
599 {
600         struct ib_port_modify port_modify = {
601                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
602         };
603         struct srpt_port *sport;
604         int i;
605
606         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
607                 sport = &sdev->port[i - 1];
608                 WARN_ON(sport->port != i);
609                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
610                         PRINT_ERROR("%s", "disabling MAD processing failed.");
611                 if (sport->mad_agent) {
612                         ib_unregister_mad_agent(sport->mad_agent);
613                         sport->mad_agent = NULL;
614                 }
615         }
616 }
617
618 /**
619  * Allocate and initialize an SRPT I/O context structure.
620  */
621 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
622 {
623         struct srpt_ioctx *ioctx;
624
625         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
626         if (!ioctx)
627                 goto out;
628
629         ioctx->buf = kzalloc(srp_max_message_size, GFP_KERNEL);
630         if (!ioctx->buf)
631                 goto out_free_ioctx;
632
633         ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf,
634                                        srp_max_message_size, DMA_BIDIRECTIONAL);
635         if (ib_dma_mapping_error(sdev->device, ioctx->dma))
636                 goto out_free_buf;
637
638         return ioctx;
639
640 out_free_buf:
641         kfree(ioctx->buf);
642 out_free_ioctx:
643         kfree(ioctx);
644 out:
645         return NULL;
646 }
647
648 /*
649  * Deallocate an SRPT I/O context structure.
650  */
651 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
652 {
653         if (!ioctx)
654                 return;
655
656         ib_dma_unmap_single(sdev->device, ioctx->dma,
657                             srp_max_message_size, DMA_BIDIRECTIONAL);
658         kfree(ioctx->buf);
659         kfree(ioctx);
660 }
661
662 /*
663  * Associate a ring of SRPT I/O context structures with the specified device.
664  */
665 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
666 {
667         int i;
668
669         TRACE_ENTRY();
670
671         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
672                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
673
674                 if (!sdev->ioctx_ring[i])
675                         goto err;
676
677                 sdev->ioctx_ring[i]->index = i;
678         }
679
680         TRACE_EXIT_RES(0);
681
682         return 0;
683
684 err:
685         while (--i > 0) {
686                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
687                 sdev->ioctx_ring[i] = NULL;
688         }
689         TRACE_EXIT_RES(-ENOMEM);
690         return -ENOMEM;
691 }
692
693 /* Free the ring of SRPT I/O context structures. */
694 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
695 {
696         int i;
697
698         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
699                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
700                 sdev->ioctx_ring[i] = NULL;
701         }
702 }
703
704 /**
705  * Set the state of a command.
706  * @new: New state to be set.
707  *
708  * Does not modify the state of aborted commands. Returns the previous command
709  * state.
710  */
711 static enum srpt_command_state srpt_set_cmd_state(struct srpt_ioctx *ioctx,
712                                                   enum srpt_command_state new)
713 {
714         enum srpt_command_state previous;
715
716         WARN_ON(!ioctx);
717         WARN_ON(new == SRPT_STATE_NEW);
718
719         do
720                 previous = atomic_read(&ioctx->state);
721         while (previous != SRPT_STATE_ABORTED
722                && atomic_cmpxchg(&ioctx->state, previous, new) != previous);
723
724         return previous;
725 }
726
727 /**
728  * Test and set the state of a command.
729  * @old: State to compare against.
730  * @new: New state to be set if the current state matches 'old'.
731  *
732  * Returns the previous command state.
733  */
734 static enum srpt_command_state
735 srpt_test_and_set_cmd_state(struct srpt_ioctx *ioctx,
736                             enum srpt_command_state old,
737                             enum srpt_command_state new)
738 {
739         WARN_ON(!ioctx);
740         WARN_ON(old == SRPT_STATE_ABORTED);
741         WARN_ON(new == SRPT_STATE_NEW);
742
743         return atomic_cmpxchg(&ioctx->state, old, new);
744 }
745
746 /**
747  * Post a receive request on the work queue of InfiniBand device 'sdev'.
748  */
749 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
750 {
751         struct ib_sge list;
752         struct ib_recv_wr wr, *bad_wr;
753
754         wr.wr_id = ioctx->index | SRPT_OP_RECV;
755
756         list.addr = ioctx->dma;
757         list.length = srp_max_message_size;
758         list.lkey = sdev->mr->lkey;
759
760         wr.next = NULL;
761         wr.sg_list = &list;
762         wr.num_sge = 1;
763
764         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
765 }
766
767 /**
768  * Post an IB send request.
769  * @ch: RDMA channel to post the send request on.
770  * @ioctx: I/O context of the send request.
771  * @len: length of the request to be sent in bytes.
772  *
773  * Returns zero upon success and a non-zero value upon failure.
774  */
775 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
776                           int len)
777 {
778         struct ib_sge list;
779         struct ib_send_wr wr, *bad_wr;
780         struct srpt_device *sdev = ch->sport->sdev;
781         int ret;
782
783         ret = -ENOMEM;
784         if (atomic_dec_return(&ch->qp_wr_avail) < 0) {
785                 atomic_inc(&ch->qp_wr_avail);
786                 PRINT_ERROR("%s[%d]: SRQ full", __func__, __LINE__);
787                 goto out;
788         }
789
790         ib_dma_sync_single_for_device(sdev->device, ioctx->dma,
791                                       len, DMA_TO_DEVICE);
792
793         list.addr = ioctx->dma;
794         list.length = len;
795         list.lkey = sdev->mr->lkey;
796
797         wr.next = NULL;
798         wr.wr_id = ioctx->index;
799         wr.sg_list = &list;
800         wr.num_sge = 1;
801         wr.opcode = IB_WR_SEND;
802         wr.send_flags = IB_SEND_SIGNALED;
803
804         ret = ib_post_send(ch->qp, &wr, &bad_wr);
805
806 out:
807         return ret;
808 }
809
810 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
811                              int *ind)
812 {
813         struct srp_indirect_buf *idb;
814         struct srp_direct_buf *db;
815
816         *ind = 0;
817         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
818             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
819                 ioctx->n_rbuf = 1;
820                 ioctx->rbufs = &ioctx->single_rbuf;
821
822                 ib_dma_sync_single_for_cpu(ioctx->ch->sport->sdev->device,
823                                            ioctx->dma + sizeof(struct srp_cmd),
824                                            sizeof(*db), DMA_FROM_DEVICE);
825
826                 db = (void *)srp_cmd->add_data;
827                 memcpy(ioctx->rbufs, db, sizeof *db);
828                 ioctx->data_len = be32_to_cpu(db->len);
829         } else {
830                 idb = (void *)srp_cmd->add_data;
831
832                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
833
834                 if (ioctx->n_rbuf >
835                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
836                         *ind = 1;
837                         ioctx->n_rbuf = 0;
838                         goto out;
839                 }
840
841                 if (ioctx->n_rbuf == 1)
842                         ioctx->rbufs = &ioctx->single_rbuf;
843                 else
844                         ioctx->rbufs =
845                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
846                 if (!ioctx->rbufs) {
847                         ioctx->n_rbuf = 0;
848                         return -ENOMEM;
849                 }
850
851                 ib_dma_sync_single_for_cpu(ioctx->ch->sport->sdev->device,
852                                            ioctx->dma + sizeof(struct srp_cmd),
853                                            ioctx->n_rbuf * sizeof(*db),
854                                            DMA_FROM_DEVICE);
855
856                 db = idb->desc_list;
857                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
858                 ioctx->data_len = be32_to_cpu(idb->len);
859         }
860 out:
861         return 0;
862 }
863
864 /*
865  * Modify the attributes of queue pair 'qp': allow local write, remote read,
866  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
867  */
868 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
869 {
870         struct ib_qp_attr *attr;
871         int ret;
872
873         attr = kzalloc(sizeof *attr, GFP_KERNEL);
874         if (!attr)
875                 return -ENOMEM;
876
877         attr->qp_state = IB_QPS_INIT;
878         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
879             IB_ACCESS_REMOTE_WRITE;
880         attr->port_num = ch->sport->port;
881         attr->pkey_index = 0;
882
883         ret = ib_modify_qp(qp, attr,
884                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
885                            IB_QP_PKEY_INDEX);
886
887         kfree(attr);
888         return ret;
889 }
890
891 /**
892  * Change the state of a channel to 'ready to receive' (RTR).
893  * @ch: channel of the queue pair.
894  * @qp: queue pair to change the state of.
895  *
896  * Returns zero upon success and a negative value upon failure.
897  *
898  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
899  * If this structure ever becomes larger, it might be necessary to allocate
900  * it dynamically instead of on the stack.
901  */
902 static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
903 {
904         struct ib_qp_attr qp_attr;
905         int attr_mask;
906         int ret;
907
908         qp_attr.qp_state = IB_QPS_RTR;
909         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
910         if (ret)
911                 goto out;
912
913         qp_attr.max_dest_rd_atomic = 4;
914
915         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
916
917 out:
918         return ret;
919 }
920
921 /**
922  * Change the state of a channel to 'ready to send' (RTS).
923  * @ch: channel of the queue pair.
924  * @qp: queue pair to change the state of.
925  *
926  * Returns zero upon success and a negative value upon failure.
927  *
928  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
929  * If this structure ever becomes larger, it might be necessary to allocate
930  * it dynamically instead of on the stack.
931  */
932 static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
933 {
934         struct ib_qp_attr qp_attr;
935         int attr_mask;
936         int ret;
937
938         qp_attr.qp_state = IB_QPS_RTS;
939         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
940         if (ret)
941                 goto out;
942
943         qp_attr.max_rd_atomic = 4;
944
945         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
946
947 out:
948         return ret;
949 }
950
951 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
952 {
953         int i;
954
955         if (ioctx->n_rdma_ius > 0 && ioctx->rdma_ius) {
956                 struct rdma_iu *riu = ioctx->rdma_ius;
957
958                 for (i = 0; i < ioctx->n_rdma_ius; ++i, ++riu)
959                         kfree(riu->sge);
960                 kfree(ioctx->rdma_ius);
961         }
962
963         if (ioctx->n_rbuf > 1)
964                 kfree(ioctx->rbufs);
965
966         /* If ch == NULL this means that the command has been aborted. */
967         if (!ch)
968                 return;
969
970         if (srpt_post_recv(ch->sport->sdev, ioctx))
971                 PRINT_ERROR("%s", "SRQ post_recv failed - this is serious.");
972                 /* we should queue it back to free_ioctx queue */
973         else
974                 atomic_inc(&ch->req_lim_delta);
975 }
976
977 /**
978  * Abort a command.
979  */
980 static void srpt_abort_scst_cmd(struct srpt_device *sdev,
981                                 struct scst_cmd *scmnd)
982 {
983         struct srpt_ioctx *ioctx;
984         scst_data_direction dir;
985         enum srpt_command_state previous_state;
986
987         TRACE_ENTRY();
988
989         ioctx = scst_cmd_get_tgt_priv(scmnd);
990         BUG_ON(!ioctx);
991
992         previous_state = srpt_set_cmd_state(ioctx, SRPT_STATE_ABORTED);
993         if (previous_state == SRPT_STATE_ABORTED)
994                 goto out;
995
996         TRACE_DBG("Aborting cmd with state %d and tag %lld",
997                   previous_state, scst_cmd_get_tag(scmnd));
998
999         dir = scst_cmd_get_data_direction(scmnd);
1000         if (dir != SCST_DATA_NONE && scst_cmd_get_sg(scmnd))
1001                 ib_dma_unmap_sg(sdev->device,
1002                                 scst_cmd_get_sg(scmnd),
1003                                 scst_cmd_get_sg_cnt(scmnd),
1004                                 scst_to_tgt_dma_dir(dir));
1005
1006         switch (previous_state) {
1007         case SRPT_STATE_NEW:
1008                 break;
1009         case SRPT_STATE_NEED_DATA:
1010                 WARN_ON(scst_cmd_get_data_direction(ioctx->scmnd)
1011                         == SCST_DATA_READ);
1012                 scst_rx_data(scmnd,
1013                              SCST_RX_STATUS_ERROR,
1014                              SCST_CONTEXT_THREAD);
1015                 break;
1016         case SRPT_STATE_DATA_IN:
1017         case SRPT_STATE_PROCESSED:
1018                 scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
1019                 WARN_ON(scmnd->state != SCST_CMD_STATE_XMIT_WAIT);
1020                 scst_tgt_cmd_done(scmnd, scst_estimate_context());
1021                 break;
1022         default:
1023                 TRACE_DBG("Aborting cmd with state %d", previous_state);
1024                 WARN_ON("ERROR: unexpected command state");
1025         }
1026
1027 out:
1028         ;
1029
1030         TRACE_EXIT();
1031 }
1032
1033 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
1034 {
1035         struct srpt_ioctx *ioctx;
1036         struct srpt_device *sdev = ch->sport->sdev;
1037
1038         if (wc->wr_id & SRPT_OP_RECV) {
1039                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
1040                 PRINT_ERROR("%s", "This is serious - SRQ is in bad state.");
1041         } else {
1042                 ioctx = sdev->ioctx_ring[wc->wr_id];
1043
1044                 if (ioctx->scmnd)
1045                         srpt_abort_scst_cmd(sdev, ioctx->scmnd);
1046                 else
1047                         srpt_reset_ioctx(ch, ioctx);
1048         }
1049 }
1050
1051 /** Process an IB send completion notification. */
1052 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
1053                                   struct srpt_ioctx *ioctx,
1054                                   enum scst_exec_context context)
1055 {
1056         if (ioctx->scmnd) {
1057                 scst_data_direction dir =
1058                         scst_cmd_get_data_direction(ioctx->scmnd);
1059
1060                 if (dir != SCST_DATA_NONE && scst_cmd_get_sg(ioctx->scmnd))
1061                         ib_dma_unmap_sg(ch->sport->sdev->device,
1062                                         scst_cmd_get_sg(ioctx->scmnd),
1063                                         scst_cmd_get_sg_cnt(ioctx->scmnd),
1064                                         scst_to_tgt_dma_dir(dir));
1065
1066                 WARN_ON(ioctx->scmnd->state != SCST_CMD_STATE_XMIT_WAIT);
1067                 scst_tgt_cmd_done(ioctx->scmnd, context);
1068         } else
1069                 srpt_reset_ioctx(ch, ioctx);
1070 }
1071
1072 /** Process an IB RDMA completion notification. */
1073 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
1074                                   struct srpt_ioctx *ioctx)
1075 {
1076         if (!ioctx->scmnd) {
1077                 WARN_ON("ERROR: ioctx->scmnd == NULL");
1078                 srpt_reset_ioctx(ch, ioctx);
1079                 return;
1080         }
1081
1082         /*
1083          * If an RDMA completion notification has been received for a write
1084          * command, tell SCST that processing can continue by calling
1085          * scst_rx_data().
1086          */
1087         if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
1088                                 SRPT_STATE_DATA_IN) == SRPT_STATE_NEED_DATA) {
1089                 WARN_ON(scst_cmd_get_data_direction(ioctx->scmnd)
1090                         == SCST_DATA_READ);
1091                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
1092                              scst_estimate_context());
1093         }
1094 }
1095
1096 /**
1097  * Build an SRP_RSP response.
1098  * @ch: RDMA channel through which the request has been received.
1099  * @ioctx: I/O context in which the SRP_RSP response will be built.
1100  * @s_key: sense key that will be stored in the response.
1101  * @s_code: value that will be stored in the asc_ascq field of the sense data.
1102  * @tag: tag of the request for which this response is being generated.
1103  *
1104  * Returns the size in bytes of the SRP_RSP response.
1105  *
1106  * An SRP_RSP response contains a SCSI status or service response. See also
1107  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1108  * response. See also SPC-2 for more information about sense data.
1109  */
1110 static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
1111                               struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
1112                               u64 tag)
1113 {
1114         struct srp_rsp *srp_rsp;
1115         struct sense_data *sense;
1116         int limit_delta;
1117         int sense_data_len;
1118         int resp_len;
1119
1120         sense_data_len = (s_key == NO_SENSE) ? 0 : sizeof(*sense);
1121         resp_len = sizeof(*srp_rsp) + sense_data_len;
1122
1123         srp_rsp = ioctx->buf;
1124         memset(srp_rsp, 0, sizeof *srp_rsp);
1125
1126         limit_delta = atomic_read(&ch->req_lim_delta);
1127         atomic_sub(limit_delta, &ch->req_lim_delta);
1128
1129         srp_rsp->opcode = SRP_RSP;
1130         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1131         srp_rsp->tag = tag;
1132
1133         if (s_key != NO_SENSE) {
1134                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
1135                 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
1136                 srp_rsp->sense_data_len = cpu_to_be32(sense_data_len);
1137
1138                 sense = (struct sense_data *)(srp_rsp + 1);
1139                 sense->err_code = 0x70;
1140                 sense->key = s_key;
1141                 sense->asc_ascq = s_code;
1142         }
1143
1144         return resp_len;
1145 }
1146
1147 /**
1148  * Build a task management response, which is a specific SRP_RSP response.
1149  * @ch: RDMA channel through which the request has been received.
1150  * @ioctx: I/O context in which the SRP_RSP response will be built.
1151  * @rsp_code: RSP_CODE that will be stored in the response.
1152  * @tag: tag of the request for which this response is being generated.
1153  *
1154  * Returns the size in bytes of the SRP_RSP response.
1155  *
1156  * An SRP_RSP response contains a SCSI status or service response. See also
1157  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1158  * response.
1159  */
1160 static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
1161                                   struct srpt_ioctx *ioctx, u8 rsp_code,
1162                                   u64 tag)
1163 {
1164         struct srp_rsp *srp_rsp;
1165         int limit_delta;
1166         int resp_data_len;
1167         int resp_len;
1168
1169         resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4;
1170         resp_len = sizeof(*srp_rsp) + resp_data_len;
1171
1172         srp_rsp = ioctx->buf;
1173         memset(srp_rsp, 0, sizeof *srp_rsp);
1174
1175         limit_delta = atomic_read(&ch->req_lim_delta);
1176         atomic_sub(limit_delta, &ch->req_lim_delta);
1177
1178         srp_rsp->opcode = SRP_RSP;
1179         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1180         srp_rsp->tag = tag;
1181
1182         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
1183                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
1184                 srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
1185                 srp_rsp->data[3] = rsp_code;
1186         }
1187
1188         return resp_len;
1189 }
1190
1191 /*
1192  * Process SRP_CMD.
1193  */
1194 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
1195 {
1196         struct scst_cmd *scmnd;
1197         struct srp_cmd *srp_cmd;
1198         struct srp_rsp *srp_rsp;
1199         scst_data_direction dir;
1200         int indirect_desc = 0;
1201         int ret;
1202
1203         srp_cmd = ioctx->buf;
1204         srp_rsp = ioctx->buf;
1205
1206         dir = SCST_DATA_NONE;
1207         if (srp_cmd->buf_fmt) {
1208                 ret = srpt_get_desc_tbl(ioctx, srp_cmd, &indirect_desc);
1209                 if (ret) {
1210                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1211                                            NO_ADD_SENSE, srp_cmd->tag);
1212                         srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1213                         goto err;
1214                 }
1215
1216                 if (indirect_desc) {
1217                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1218                                            NO_ADD_SENSE, srp_cmd->tag);
1219                         srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1220                         goto err;
1221                 }
1222
1223                 /*
1224                  * The lower four bits of the buffer format field contain the
1225                  * DATA-IN buffer descriptor format, and the highest four bits
1226                  * contain the DATA-OUT buffer descriptor format.
1227                  */
1228                 if (srp_cmd->buf_fmt & 0xf)
1229                         /* DATA-IN: transfer data from target to initiator. */
1230                         dir = SCST_DATA_READ;
1231                 else if (srp_cmd->buf_fmt >> 4)
1232                         /* DATA-OUT: transfer data from initiator to target. */
1233                         dir = SCST_DATA_WRITE;
1234         }
1235
1236         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
1237                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
1238                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
1239         if (!scmnd) {
1240                 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1241                                    NO_ADD_SENSE, srp_cmd->tag);
1242                 srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1243                 goto err;
1244         }
1245
1246         ioctx->scmnd = scmnd;
1247
1248         switch (srp_cmd->task_attr) {
1249         case SRP_CMD_HEAD_OF_Q:
1250                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
1251                 break;
1252         case SRP_CMD_ORDERED_Q:
1253                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1254                 break;
1255         case SRP_CMD_SIMPLE_Q:
1256                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
1257                 break;
1258         case SRP_CMD_ACA:
1259                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
1260                 break;
1261         default:
1262                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1263                 break;
1264         }
1265
1266         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1267         scst_cmd_set_tgt_priv(scmnd, ioctx);
1268         scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
1269         scst_cmd_init_done(scmnd, scst_estimate_context());
1270
1271         return 0;
1272
1273 err:
1274         WARN_ON(srp_rsp->opcode != SRP_RSP);
1275
1276         return -1;
1277 }
1278
1279 /*
1280  * Process an SRP_TSK_MGMT request.
1281  *
1282  * Returns 0 upon success and -1 upon failure.
1283  *
1284  * Each task management function is performed by calling one of the
1285  * scst_rx_mgmt_fn*() functions. These functions will either report failure
1286  * or process the task management function asynchronously. The function
1287  * srpt_tsk_mgmt_done() will be called by the SCST core upon completion of the
1288  * task management function. When srpt_handle_tsk_mgmt() reports failure
1289  * (i.e. returns -1) a response will have been built in ioctx->buf. This
1290  * information unit has to be sent back by the caller.
1291  *
1292  * For more information about SRP_TSK_MGMT information units, see also section
1293  * 6.7 in the T10 SRP r16a document.
1294  */
1295 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1296                                 struct srpt_ioctx *ioctx)
1297 {
1298         struct srp_tsk_mgmt *srp_tsk;
1299         struct srpt_mgmt_ioctx *mgmt_ioctx;
1300         int ret;
1301
1302         ib_dma_sync_single_for_cpu(ch->sport->sdev->device, ioctx->dma,
1303                                    sizeof(struct srp_tsk_mgmt),
1304                                    DMA_FROM_DEVICE);
1305
1306         srp_tsk = ioctx->buf;
1307
1308         TRACE_DBG("recv_tsk_mgmt= %d for task_tag= %lld"
1309                   " using tag= %lld cm_id= %p sess= %p",
1310                   srp_tsk->tsk_mgmt_func,
1311                   (unsigned long long) srp_tsk->task_tag,
1312                   (unsigned long long) srp_tsk->tag,
1313                   ch->cm_id, ch->scst_sess);
1314
1315         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1316         if (!mgmt_ioctx) {
1317                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1318                                        srp_tsk->tag);
1319                 goto err;
1320         }
1321
1322         mgmt_ioctx->ioctx = ioctx;
1323         mgmt_ioctx->ch = ch;
1324         mgmt_ioctx->tag = srp_tsk->tag;
1325
1326         switch (srp_tsk->tsk_mgmt_func) {
1327         case SRP_TSK_ABORT_TASK:
1328                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK");
1329                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1330                                           SCST_ABORT_TASK,
1331                                           srp_tsk->task_tag,
1332                                           thread ?
1333                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1334                                           mgmt_ioctx);
1335                 break;
1336         case SRP_TSK_ABORT_TASK_SET:
1337                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK_SET");
1338                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1339                                           SCST_ABORT_TASK_SET,
1340                                           (u8 *) &srp_tsk->lun,
1341                                           sizeof srp_tsk->lun,
1342                                           thread ?
1343                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1344                                           mgmt_ioctx);
1345                 break;
1346         case SRP_TSK_CLEAR_TASK_SET:
1347                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_TASK_SET");
1348                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1349                                           SCST_CLEAR_TASK_SET,
1350                                           (u8 *) &srp_tsk->lun,
1351                                           sizeof srp_tsk->lun,
1352                                           thread ?
1353                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1354                                           mgmt_ioctx);
1355                 break;
1356         case SRP_TSK_LUN_RESET:
1357                 TRACE_DBG("%s", "Processing SRP_TSK_LUN_RESET");
1358                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1359                                           SCST_LUN_RESET,
1360                                           (u8 *) &srp_tsk->lun,
1361                                           sizeof srp_tsk->lun,
1362                                           thread ?
1363                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1364                                           mgmt_ioctx);
1365                 break;
1366         case SRP_TSK_CLEAR_ACA:
1367                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_ACA");
1368                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1369                                           SCST_CLEAR_ACA,
1370                                           (u8 *) &srp_tsk->lun,
1371                                           sizeof srp_tsk->lun,
1372                                           thread ?
1373                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1374                                           mgmt_ioctx);
1375                 break;
1376         default:
1377                 TRACE_DBG("%s", "Unsupported task management function.");
1378                 srpt_build_tskmgmt_rsp(ch, ioctx,
1379                                        SRP_TSK_MGMT_FUNC_NOT_SUPP,
1380                                        srp_tsk->tag);
1381                 goto err;
1382         }
1383
1384         if (ret) {
1385                 TRACE_DBG("%s", "Processing task management function failed.");
1386                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1387                                        srp_tsk->tag);
1388                 goto err;
1389         }
1390
1391         WARN_ON(srp_tsk->opcode == SRP_RSP);
1392
1393         return 0;
1394
1395 err:
1396         WARN_ON(srp_tsk->opcode != SRP_RSP);
1397
1398         kfree(mgmt_ioctx);
1399         return -1;
1400 }
1401
1402 /**
1403  * Process a newly received information unit.
1404  * @ch: RDMA channel through which the information unit has been received.
1405  * @ioctx: SRPT I/O context associated with the information unit.
1406  */
1407 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1408                                struct srpt_ioctx *ioctx)
1409 {
1410         struct srp_cmd *srp_cmd;
1411         struct srp_rsp *srp_rsp;
1412         unsigned long flags;
1413         enum rdma_ch_state ch_state;
1414         int len;
1415
1416         ch_state = atomic_read(&ch->state);
1417         if (ch_state != RDMA_CHANNEL_LIVE) {
1418                 spin_lock_irqsave(&ch->spinlock, flags);
1419                 ch_state = atomic_read(&ch->state);
1420                 if (ch_state == RDMA_CHANNEL_CONNECTING) {
1421                         list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1422                         spin_unlock_irqrestore(&ch->spinlock, flags);
1423                         return;
1424                 } else if (ch_state == RDMA_CHANNEL_DISCONNECTING) {
1425                         spin_unlock_irqrestore(&ch->spinlock, flags);
1426                         srpt_reset_ioctx(ch, ioctx);
1427                         return;
1428                 }
1429                 spin_unlock_irqrestore(&ch->spinlock, flags);
1430         }
1431
1432         WARN_ON(ch_state != RDMA_CHANNEL_LIVE);
1433
1434         ib_dma_sync_single_for_cpu(ch->sport->sdev->device, ioctx->dma,
1435                                    sizeof(struct srp_cmd), DMA_FROM_DEVICE);
1436
1437         ioctx->data_len = 0;
1438         ioctx->n_rbuf = 0;
1439         ioctx->rbufs = NULL;
1440         ioctx->n_rdma = 0;
1441         ioctx->n_rdma_ius = 0;
1442         ioctx->rdma_ius = NULL;
1443         ioctx->scmnd = NULL;
1444         ioctx->ch = ch;
1445         atomic_set(&ioctx->state, SRPT_STATE_NEW);
1446
1447         srp_cmd = ioctx->buf;
1448         srp_rsp = ioctx->buf;
1449
1450         switch (srp_cmd->opcode) {
1451         case SRP_CMD:
1452                 if (srpt_handle_cmd(ch, ioctx) < 0)
1453                         goto err;
1454                 break;
1455
1456         case SRP_TSK_MGMT:
1457                 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1458                         goto err;
1459                 break;
1460
1461         case SRP_I_LOGOUT:
1462         case SRP_AER_REQ:
1463         default:
1464                 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1465                                    srp_cmd->tag);
1466                 goto err;
1467         }
1468
1469         return;
1470
1471 err:
1472         WARN_ON(srp_rsp->opcode != SRP_RSP);
1473         len = (sizeof *srp_rsp) + be32_to_cpu(srp_rsp->sense_data_len);
1474
1475         ch_state = atomic_read(&ch->state);
1476         if (ch_state != RDMA_CHANNEL_LIVE) {
1477                 /* Give up if another thread modified the channel state. */
1478                 PRINT_ERROR("%s: channel is in state %d", __func__, ch_state);
1479                 srpt_reset_ioctx(ch, ioctx);
1480         } else if (srpt_post_send(ch, ioctx, len)) {
1481                 PRINT_ERROR("%s: sending SRP_RSP response failed", __func__);
1482                 srpt_reset_ioctx(ch, ioctx);
1483         }
1484 }
1485
1486 /*
1487  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1488  * should stop.
1489  * @pre thread != 0
1490  */
1491 static inline int srpt_test_ioctx_list(void)
1492 {
1493         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1494                    unlikely(kthread_should_stop()));
1495         return res;
1496 }
1497
1498 /*
1499  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1500  *
1501  * @pre thread != 0
1502  */
1503 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1504 {
1505         unsigned long flags;
1506
1507         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1508         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1509         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1510         wake_up(&ioctx_list_waitQ);
1511 }
1512
1513 /**
1514  * InfiniBand completion queue callback function.
1515  * @cq: completion queue.
1516  * @ctx: completion queue context, which was passed as the fourth argument of
1517  *       the function ib_create_cq().
1518  */
1519 static void srpt_completion(struct ib_cq *cq, void *ctx)
1520 {
1521         struct srpt_rdma_ch *ch = ctx;
1522         struct srpt_device *sdev = ch->sport->sdev;
1523         struct ib_wc wc;
1524         struct srpt_ioctx *ioctx;
1525
1526         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1527         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1528                 if (wc.status) {
1529                         PRINT_ERROR("failed %s status= %d",
1530                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1531                                wc.status);
1532                         srpt_handle_err_comp(ch, &wc);
1533                         break;
1534                 }
1535
1536                 if (wc.wr_id & SRPT_OP_RECV) {
1537                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1538                         if (thread) {
1539                                 ioctx->ch = ch;
1540                                 ioctx->op = IB_WC_RECV;
1541                                 srpt_schedule_thread(ioctx);
1542                         } else
1543                                 srpt_handle_new_iu(ch, ioctx);
1544                         continue;
1545                 } else {
1546                         ioctx = sdev->ioctx_ring[wc.wr_id];
1547                         if (wc.opcode == IB_WC_SEND)
1548                                 atomic_inc(&ch->qp_wr_avail);
1549                         else {
1550                                 WARN_ON(wc.opcode != IB_WC_RDMA_READ);
1551                                 WARN_ON(ioctx->n_rdma <= 0);
1552                                 atomic_add(ioctx->n_rdma,
1553                                            &ch->qp_wr_avail);
1554                         }
1555                 }
1556
1557                 if (thread) {
1558                         ioctx->ch = ch;
1559                         ioctx->op = wc.opcode;
1560                         srpt_schedule_thread(ioctx);
1561                 } else {
1562                         switch (wc.opcode) {
1563                         case IB_WC_SEND:
1564                                 srpt_handle_send_comp(ch, ioctx,
1565                                         scst_estimate_context());
1566                                 break;
1567                         case IB_WC_RDMA_WRITE:
1568                         case IB_WC_RDMA_READ:
1569                                 srpt_handle_rdma_comp(ch, ioctx);
1570                                 break;
1571                         default:
1572                                 break;
1573                         }
1574                 }
1575
1576 #if defined(CONFIG_SCST_DEBUG)
1577                 if (interrupt_processing_delay_in_us <= MAX_UDELAY_MS * 1000)
1578                         udelay(interrupt_processing_delay_in_us);
1579 #endif
1580         }
1581 }
1582
1583 /*
1584  * Create a completion queue on the specified device.
1585  */
1586 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1587 {
1588         struct ib_qp_init_attr *qp_init;
1589         struct srpt_device *sdev = ch->sport->sdev;
1590         int cqe;
1591         int ret;
1592
1593         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1594         if (!qp_init)
1595                 return -ENOMEM;
1596
1597         /* Create a completion queue (CQ). */
1598
1599         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1600 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1601         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1602 #else
1603         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1604 #endif
1605         if (IS_ERR(ch->cq)) {
1606                 ret = PTR_ERR(ch->cq);
1607                 PRINT_ERROR("failed to create_cq cqe= %d ret= %d", cqe, ret);
1608                 goto out;
1609         }
1610
1611         /* Request completion notification. */
1612
1613         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1614
1615         /* Create a queue pair (QP). */
1616
1617         qp_init->qp_context = (void *)ch;
1618         qp_init->event_handler
1619                 = (void(*)(struct ib_event *, void*))srpt_qp_event;
1620         qp_init->send_cq = ch->cq;
1621         qp_init->recv_cq = ch->cq;
1622         qp_init->srq = sdev->srq;
1623         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1624         qp_init->qp_type = IB_QPT_RC;
1625         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1626         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1627
1628         ch->qp = ib_create_qp(sdev->pd, qp_init);
1629         if (IS_ERR(ch->qp)) {
1630                 ret = PTR_ERR(ch->qp);
1631                 ib_destroy_cq(ch->cq);
1632                 PRINT_ERROR("failed to create_qp ret= %d", ret);
1633                 goto out;
1634         }
1635
1636         atomic_set(&ch->qp_wr_avail, qp_init->cap.max_send_wr);
1637
1638         TRACE_DBG("%s: max_cqe= %d max_sge= %d cm_id= %p",
1639                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1640                ch->cm_id);
1641
1642         /* Modify the attributes and the state of queue pair ch->qp. */
1643
1644         ret = srpt_init_ch_qp(ch, ch->qp);
1645         if (ret) {
1646                 ib_destroy_qp(ch->qp);
1647                 ib_destroy_cq(ch->cq);
1648                 goto out;
1649         }
1650
1651         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1652 out:
1653         kfree(qp_init);
1654         return ret;
1655 }
1656
1657 /**
1658  * Look up the RDMA channel that corresponds to the specified cm_id.
1659  *
1660  * Return NULL if no matching RDMA channel has been found.
1661  *
1662  * Notes:
1663  * - Must be called from inside srpt_cm_handler to avoid a race between
1664  *   accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one()
1665  *   (the caller of srpt_cm_handler holds the cm_id spinlock;
1666  *   srpt_remove_one() waits until all SCST sessions for the associated
1667  *   IB device have been unregistered and SCST session registration involves
1668  *   a call to ib_destroy_cm_id(), which locks the cm_id spinlock and hence
1669  *   waits until this function has finished).
1670  * - When release_ch == true the return value may be compared with NULL but
1671  *   but must not be dereferenced because in this case the return value is a
1672  *   dangling pointer. 
1673  */
1674 static struct srpt_rdma_ch *srpt_find_channel(struct ib_cm_id *cm_id,
1675                                               bool release_ch)
1676 {
1677         struct srpt_device *sdev;
1678         struct srpt_rdma_ch *ch;
1679
1680         sdev = cm_id->context;
1681         BUG_ON(!sdev);
1682         ch = NULL;
1683         spin_lock_irq(&sdev->spinlock);
1684         list_for_each_entry(ch, &sdev->rch_list, list) {
1685                 if (ch->cm_id == cm_id) {
1686                         if (release_ch) {
1687                                 list_del(&ch->list);
1688                                 scst_unregister_session(ch->scst_sess, 0,
1689                                                         srpt_release_channel);
1690                         }
1691                         break;
1692                 }
1693         }
1694
1695         spin_unlock_irq(&sdev->spinlock);
1696
1697         return ch;
1698 }
1699
1700 /**
1701  * Release all resources associated with an RDMA channel.
1702  *
1703  * Notes:
1704  * - The caller must have removed the channel from the channel list before
1705  *   calling this function.
1706  * - Must be called as a callback function via scst_unregister_session(). Never
1707  *   call this function directly because doing so would trigger several race
1708  *   conditions.
1709  */
1710 static void srpt_release_channel(struct scst_session *scst_sess)
1711 {
1712         struct srpt_rdma_ch *ch;
1713
1714         TRACE_ENTRY();
1715
1716         ch = scst_sess_get_tgt_priv(scst_sess);
1717         BUG_ON(!ch);
1718         WARN_ON(srpt_find_channel(ch->cm_id, false) == ch);
1719
1720         TRACE_DBG("destroying cm_id %p", ch->cm_id);
1721         BUG_ON(!ch->cm_id);
1722         ib_destroy_cm_id(ch->cm_id);
1723
1724         ib_destroy_qp(ch->qp);
1725         ib_destroy_cq(ch->cq);
1726         kfree(ch);
1727
1728         TRACE_EXIT();
1729 }
1730
1731 /**
1732  * Process the event IB_CM_REQ_RECEIVED.
1733  *
1734  * Ownership of the cm_id is transferred to the SCST session if this functions
1735  * returns zero. Otherwise the caller remains the owner of cm_id.
1736  */
1737 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1738                             struct ib_cm_req_event_param *param,
1739                             void *private_data)
1740 {
1741         struct srpt_device *sdev = cm_id->context;
1742         struct srp_login_req *req;
1743         struct srp_login_rsp *rsp;
1744         struct srp_login_rej *rej;
1745         struct ib_cm_rep_param *rep_param;
1746         struct srpt_rdma_ch *ch, *tmp_ch;
1747         u32 it_iu_len;
1748         int ret = 0;
1749
1750 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
1751         WARN_ON(!sdev || !private_data);
1752         if (!sdev || !private_data)
1753                 return -EINVAL;
1754 #else
1755         if (WARN_ON(!sdev || !private_data))
1756                 return -EINVAL;
1757 #endif
1758
1759         req = (struct srp_login_req *)private_data;
1760
1761         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1762
1763         PRINT_INFO("Received SRP_LOGIN_REQ with"
1764             " i_port_id 0x%llx:0x%llx, t_port_id 0x%llx:0x%llx and length %d"
1765             " on port %d (guid=0x%llx:0x%llx)",
1766             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1767             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1768             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1769             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1770             it_iu_len,
1771             param->port,
1772             (unsigned long long)be64_to_cpu(*(u64 *)
1773                                 &sdev->port[param->port - 1].gid.raw[0]),
1774             (unsigned long long)be64_to_cpu(*(u64 *)
1775                                 &sdev->port[param->port - 1].gid.raw[8]));
1776
1777         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1778         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1779         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1780
1781         if (!rsp || !rej || !rep_param) {
1782                 ret = -ENOMEM;
1783                 goto out;
1784         }
1785
1786         if (it_iu_len > srp_max_message_size || it_iu_len < 64) {
1787                 rej->reason =
1788                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1789                 ret = -EINVAL;
1790                 PRINT_ERROR("rejected SRP_LOGIN_REQ because its"
1791                             " length (%d bytes) is out of range (%d .. %d)",
1792                             it_iu_len, 64, srp_max_message_size);
1793                 goto reject;
1794         }
1795
1796         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1797                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1798
1799                 spin_lock_irq(&sdev->spinlock);
1800
1801                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1802                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1803                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1804                             && param->port == ch->sport->port
1805                             && param->listen_id == ch->sport->sdev->cm_id
1806                             && ch->cm_id) {
1807                                 enum rdma_ch_state prev_state;
1808
1809                                 /* found an existing channel */
1810                                 TRACE_DBG("Found existing channel name= %s"
1811                                           " cm_id= %p state= %d",
1812                                           ch->sess_name, ch->cm_id,
1813                                           atomic_read(&ch->state));
1814
1815                                 prev_state
1816                                 = srpt_test_and_set_channel_state(ch,
1817                                         RDMA_CHANNEL_LIVE,
1818                                         RDMA_CHANNEL_DISCONNECTING);
1819                                 if (prev_state == RDMA_CHANNEL_CONNECTING)
1820                                         list_del(&ch->list);
1821
1822                                 spin_unlock_irq(&sdev->spinlock);
1823
1824                                 rsp->rsp_flags =
1825                                         SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1826
1827                                 if (prev_state == RDMA_CHANNEL_LIVE) {
1828                                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
1829                                         PRINT_INFO("disconnected"
1830                                           " session %s because a new"
1831                                           " SRP_LOGIN_REQ has been received.",
1832                                           ch->sess_name);
1833                                 } else if (prev_state ==
1834                                          RDMA_CHANNEL_CONNECTING) {
1835                                         PRINT_ERROR("%s", "rejected"
1836                                           " SRP_LOGIN_REQ because another login"
1837                                           " request is being processed.");
1838                                         ib_send_cm_rej(ch->cm_id,
1839                                                        IB_CM_REJ_NO_RESOURCES,
1840                                                        NULL, 0, NULL, 0);
1841                                         scst_unregister_session(ch->scst_sess,
1842                                                         0,
1843                                                         srpt_release_channel);
1844                                 }
1845
1846                                 spin_lock_irq(&sdev->spinlock);
1847                         }
1848                 }
1849
1850                 spin_unlock_irq(&sdev->spinlock);
1851
1852         } else
1853                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1854
1855         if (((u64) (*(u64 *) req->target_port_id) !=
1856              cpu_to_be64(srpt_service_guid)) ||
1857             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1858              cpu_to_be64(srpt_service_guid))) {
1859                 rej->reason =
1860                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1861                 ret = -ENOMEM;
1862                 PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because it"
1863                        " has an invalid target port identifier.");
1864                 goto reject;
1865         }
1866
1867         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1868         if (!ch) {
1869                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1870                 PRINT_ERROR("%s",
1871                             "rejected SRP_LOGIN_REQ because out of memory.");
1872                 ret = -ENOMEM;
1873                 goto reject;
1874         }
1875
1876         spin_lock_init(&ch->spinlock);
1877         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1878         memcpy(ch->t_port_id, req->target_port_id, 16);
1879         ch->sport = &sdev->port[param->port - 1];
1880         ch->cm_id = cm_id;
1881         atomic_set(&ch->state, RDMA_CHANNEL_CONNECTING);
1882         INIT_LIST_HEAD(&ch->cmd_wait_list);
1883
1884         ret = srpt_create_ch_ib(ch);
1885         if (ret) {
1886                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1887                 PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because creating"
1888                             " a new RDMA channel failed.");
1889                 goto free_ch;
1890         }
1891
1892         ret = srpt_ch_qp_rtr(ch, ch->qp);
1893         if (ret) {
1894                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1895                 PRINT_ERROR("rejected SRP_LOGIN_REQ because enabling"
1896                        " RTR failed (error code = %d)", ret);
1897                 goto destroy_ib;
1898         }
1899
1900         if (use_port_guid_in_session_name) {
1901                 /*
1902                  * If the kernel module parameter use_port_guid_in_session_name
1903                  * has been specified, use a combination of the target port
1904                  * GUID and the initiator port ID as the session name. This
1905                  * was the original behavior of the SRP target implementation
1906                  * (i.e. before the SRPT was included in OFED 1.3).
1907                  */
1908                 snprintf(ch->sess_name, sizeof(ch->sess_name),
1909                          "0x%016llx%016llx",
1910                          (unsigned long long)be64_to_cpu(*(u64 *)
1911                                 &sdev->port[param->port - 1].gid.raw[8]),
1912                          (unsigned long long)be64_to_cpu(*(u64 *)
1913                                 (ch->i_port_id + 8)));
1914         } else {
1915                 /*
1916                  * Default behavior: use the initator port identifier as the
1917                  * session name.
1918                  */
1919                 snprintf(ch->sess_name, sizeof(ch->sess_name),
1920                          "0x%016llx%016llx",
1921                          (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1922                          (unsigned long long)be64_to_cpu(*(u64 *)
1923                                  (ch->i_port_id + 8)));
1924         }
1925
1926         TRACE_DBG("registering session %s", ch->sess_name);
1927
1928         BUG_ON(!sdev->scst_tgt);
1929         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1930                                               NULL, NULL);
1931         if (!ch->scst_sess) {
1932                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1933                 TRACE_DBG("%s", "Failed to create scst sess");
1934                 goto destroy_ib;
1935         }
1936
1937         TRACE_DBG("Establish connection sess=%p name=%s cm_id=%p",
1938                   ch->scst_sess, ch->sess_name, ch->cm_id);
1939
1940         scst_sess_set_tgt_priv(ch->scst_sess, ch);
1941
1942         /* create srp_login_response */
1943         rsp->opcode = SRP_LOGIN_RSP;
1944         rsp->tag = req->tag;
1945         rsp->max_it_iu_len = req->req_it_iu_len;
1946         rsp->max_ti_iu_len = req->req_it_iu_len;
1947         rsp->buf_fmt =
1948             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1949         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1950         atomic_set(&ch->req_lim_delta, 0);
1951
1952         /* create cm reply */
1953         rep_param->qp_num = ch->qp->qp_num;
1954         rep_param->private_data = (void *)rsp;
1955         rep_param->private_data_len = sizeof *rsp;
1956         rep_param->rnr_retry_count = 7;
1957         rep_param->flow_control = 1;
1958         rep_param->failover_accepted = 0;
1959         rep_param->srq = 1;
1960         rep_param->responder_resources = 4;
1961         rep_param->initiator_depth = 4;
1962
1963         ret = ib_send_cm_rep(cm_id, rep_param);
1964         if (ret) {
1965                 PRINT_ERROR("sending SRP_LOGIN_REQ response failed"
1966                             " (error code = %d)", ret);
1967                 goto release_channel;
1968         }
1969
1970         spin_lock_irq(&sdev->spinlock);
1971         list_add_tail(&ch->list, &sdev->rch_list);
1972         spin_unlock_irq(&sdev->spinlock);
1973
1974         goto out;
1975
1976 release_channel:
1977         scst_unregister_session(ch->scst_sess, 0, NULL);
1978         ch->scst_sess = NULL;
1979
1980 destroy_ib:
1981         ib_destroy_qp(ch->qp);
1982         ib_destroy_cq(ch->cq);
1983
1984 free_ch:
1985         kfree(ch);
1986
1987 reject:
1988         rej->opcode = SRP_LOGIN_REJ;
1989         rej->tag = req->tag;
1990         rej->buf_fmt =
1991             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1992
1993         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1994                              (void *)rej, sizeof *rej);
1995
1996 out:
1997         kfree(rep_param);
1998         kfree(rsp);
1999         kfree(rej);
2000
2001         return ret;
2002 }
2003
2004 static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
2005 {
2006         PRINT_INFO("Received InfiniBand REJ packet for cm_id %p.", cm_id);
2007         srpt_find_channel(cm_id, true);
2008 }
2009
2010 /**
2011  * Process an IB_CM_RTU_RECEIVED or IB_CM_USER_ESTABLISHED event.
2012  *
2013  * An IB_CM_RTU_RECEIVED message indicates that the connection is established
2014  * and that the recipient may begin transmitting (RTU = ready to use).
2015  */
2016 static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
2017 {
2018         struct srpt_rdma_ch *ch;
2019         int ret;
2020
2021         ch = srpt_find_channel(cm_id, false);
2022         WARN_ON(!ch);
2023         if (!ch)
2024                 goto out;
2025
2026         if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_CONNECTING,
2027                         RDMA_CHANNEL_LIVE) == RDMA_CHANNEL_CONNECTING) {
2028                 struct srpt_ioctx *ioctx, *ioctx_tmp;
2029
2030                 ret = srpt_ch_qp_rts(ch, ch->qp);
2031
2032                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
2033                                          wait_list) {
2034                         list_del(&ioctx->wait_list);
2035                         srpt_handle_new_iu(ch, ioctx);
2036                 }
2037                 if (ret && srpt_test_and_set_channel_state(ch,
2038                         RDMA_CHANNEL_LIVE,
2039                         RDMA_CHANNEL_DISCONNECTING) == RDMA_CHANNEL_LIVE) {
2040                         TRACE_DBG("cm_id=%p sess_name=%s state=%d",
2041                                   cm_id, ch->sess_name,
2042                                   atomic_read(&ch->state));
2043                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
2044                 }
2045         }
2046
2047 out:
2048         ;
2049 }
2050
2051 static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
2052 {
2053         PRINT_INFO("Received InfiniBand TimeWait exit for cm_id %p.", cm_id);
2054         srpt_find_channel(cm_id, true);
2055 }
2056
2057 static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
2058 {
2059         PRINT_INFO("Received InfiniBand REP error for cm_id %p.", cm_id);
2060         srpt_find_channel(cm_id, true);
2061 }
2062
2063 static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
2064 {
2065         struct srpt_rdma_ch *ch;
2066
2067         ch = srpt_find_channel(cm_id, false);
2068         WARN_ON(!ch);
2069         if (!ch)
2070                 goto out;
2071
2072         TRACE_DBG("cm_id= %p ch->state= %d", cm_id, atomic_read(&ch->state));
2073
2074         switch (atomic_read(&ch->state)) {
2075         case RDMA_CHANNEL_LIVE:
2076         case RDMA_CHANNEL_CONNECTING:
2077                 ib_send_cm_drep(ch->cm_id, NULL, 0);
2078                 PRINT_INFO("Received DREQ and sent DREP for session %s.",
2079                            ch->sess_name);
2080                 break;
2081         case RDMA_CHANNEL_DISCONNECTING:
2082         default:
2083                 break;
2084         }
2085
2086 out:
2087         ;
2088 }
2089
2090 static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
2091 {
2092         PRINT_INFO("Received InfiniBand DREP message for cm_id %p.", cm_id);
2093         srpt_find_channel(cm_id, true);
2094 }
2095
2096 /**
2097  * IB connection manager callback function.
2098  *
2099  * A non-zero return value will cause the caller destroy the CM ID.
2100  *
2101  * Note: srpt_cm_handler() must only return a non-zero value when transferring
2102  * ownership of the cm_id to a channel by srpt_cm_req_recv() failed. Returning
2103  * a non-zero value in any other case will trigger a race with the
2104  * ib_destroy_cm_id() call in srpt_release_channel().
2105  */
2106 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2107 {
2108         int ret;
2109
2110         ret = 0;
2111         switch (event->event) {
2112         case IB_CM_REQ_RECEIVED:
2113                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
2114                                        event->private_data);
2115                 break;
2116         case IB_CM_REJ_RECEIVED:
2117                 srpt_cm_rej_recv(cm_id);
2118                 break;
2119         case IB_CM_RTU_RECEIVED:
2120         case IB_CM_USER_ESTABLISHED:
2121                 srpt_cm_rtu_recv(cm_id);
2122                 break;
2123         case IB_CM_DREQ_RECEIVED:
2124                 srpt_cm_dreq_recv(cm_id);
2125                 break;
2126         case IB_CM_DREP_RECEIVED:
2127                 srpt_cm_drep_recv(cm_id);
2128                 break;
2129         case IB_CM_TIMEWAIT_EXIT:
2130                 srpt_cm_timewait_exit(cm_id);
2131                 break;
2132         case IB_CM_REP_ERROR:
2133                 srpt_cm_rep_error(cm_id);
2134                 break;
2135         default:
2136                 break;
2137         }
2138
2139         return ret;
2140 }
2141
2142 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
2143                                  struct srpt_ioctx *ioctx,
2144                                  struct scst_cmd *scmnd)
2145 {
2146         struct scatterlist *scat;
2147         scst_data_direction dir;
2148         struct rdma_iu *riu;
2149         struct srp_direct_buf *db;
2150         dma_addr_t dma_addr;
2151         struct ib_sge *sge;
2152         u64 raddr;
2153         u32 rsize;
2154         u32 tsize;
2155         u32 dma_len;
2156         int count, nrdma;
2157         int i, j, k;
2158
2159         scat = scst_cmd_get_sg(scmnd);
2160         dir = scst_cmd_get_data_direction(scmnd);
2161         WARN_ON(scat == NULL);
2162         count = ib_dma_map_sg(ch->sport->sdev->device, scat,
2163                               scst_cmd_get_sg_cnt(scmnd),
2164                               scst_to_tgt_dma_dir(dir));
2165         if (unlikely(!count))
2166                 return -EBUSY;
2167
2168         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
2169                 nrdma = ioctx->n_rdma_ius;
2170         else {
2171                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
2172
2173                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
2174                                           scst_cmd_atomic(scmnd)
2175                                           ? GFP_ATOMIC : GFP_KERNEL);
2176                 if (!ioctx->rdma_ius) {
2177                         WARN_ON(scat == NULL);
2178                         ib_dma_unmap_sg(ch->sport->sdev->device,
2179                                         scat, scst_cmd_get_sg_cnt(scmnd),
2180                                         scst_to_tgt_dma_dir(dir));
2181                         return -ENOMEM;
2182                 }
2183
2184                 ioctx->n_rdma_ius = nrdma;
2185         }
2186
2187         db = ioctx->rbufs;
2188         tsize = (dir == SCST_DATA_READ) ?
2189                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2190         dma_len = sg_dma_len(&scat[0]);
2191         riu = ioctx->rdma_ius;
2192
2193         /*
2194          * For each remote desc - calculate the #ib_sge.
2195          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
2196          *      each remote desc rdma_iu is required a rdma wr;
2197          * else
2198          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
2199          *      another rdma wr
2200          */
2201         for (i = 0, j = 0;
2202              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2203                 rsize = be32_to_cpu(db->len);
2204                 raddr = be64_to_cpu(db->va);
2205                 riu->raddr = raddr;
2206                 riu->rkey = be32_to_cpu(db->key);
2207                 riu->sge_cnt = 0;
2208
2209                 /* calculate how many sge required for this remote_buf */
2210                 while (rsize > 0 && tsize > 0) {
2211
2212                         if (rsize >= dma_len) {
2213                                 tsize -= dma_len;
2214                                 rsize -= dma_len;
2215                                 raddr += dma_len;
2216
2217                                 if (tsize > 0) {
2218                                         ++j;
2219                                         if (j < count)
2220                                                 dma_len = sg_dma_len(&scat[j]);
2221                                 }
2222                         } else {
2223                                 tsize -= rsize;
2224                                 dma_len -= rsize;
2225                                 rsize = 0;
2226                         }
2227
2228                         ++riu->sge_cnt;
2229
2230                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
2231                                 ++ioctx->n_rdma;
2232                                 riu->sge =
2233                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
2234                                             scst_cmd_atomic(scmnd)
2235                                             ? GFP_ATOMIC : GFP_KERNEL);
2236                                 if (!riu->sge)
2237                                         goto free_mem;
2238
2239                                 ++riu;
2240                                 riu->sge_cnt = 0;
2241                                 riu->raddr = raddr;
2242                                 riu->rkey = be32_to_cpu(db->key);
2243                         }
2244                 }
2245
2246                 ++ioctx->n_rdma;
2247                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
2248                                    scst_cmd_atomic(scmnd)
2249                                    ? GFP_ATOMIC : GFP_KERNEL);
2250                 if (!riu->sge)
2251                         goto free_mem;
2252         }
2253
2254         db = ioctx->rbufs;
2255         scat = scst_cmd_get_sg(scmnd);
2256         tsize = (dir == SCST_DATA_READ) ?
2257                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2258         riu = ioctx->rdma_ius;
2259         dma_len = sg_dma_len(&scat[0]);
2260         dma_addr = sg_dma_address(&scat[0]);
2261
2262         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
2263         for (i = 0, j = 0;
2264              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2265                 rsize = be32_to_cpu(db->len);
2266                 sge = riu->sge;
2267                 k = 0;
2268
2269                 while (rsize > 0 && tsize > 0) {
2270                         sge->addr = dma_addr;
2271                         sge->lkey = ch->sport->sdev->mr->lkey;
2272
2273                         if (rsize >= dma_len) {
2274                                 sge->length =
2275                                         (tsize < dma_len) ? tsize : dma_len;
2276                                 tsize -= dma_len;
2277                                 rsize -= dma_len;
2278
2279                                 if (tsize > 0) {
2280                                         ++j;
2281                                         if (j < count) {
2282                                                 dma_len = sg_dma_len(&scat[j]);
2283                                                 dma_addr =
2284                                                     sg_dma_address(&scat[j]);
2285                                         }
2286                                 }
2287                         } else {
2288                                 sge->length = (tsize < rsize) ? tsize : rsize;
2289                                 tsize -= rsize;
2290                                 dma_len -= rsize;
2291                                 dma_addr += rsize;
2292                                 rsize = 0;
2293                         }
2294
2295                         ++k;
2296                         if (k == riu->sge_cnt && rsize > 0) {
2297                                 ++riu;
2298                                 sge = riu->sge;
2299                                 k = 0;
2300                         } else if (rsize > 0)
2301                                 ++sge;
2302                 }
2303         }
2304
2305         return 0;
2306
2307 free_mem:
2308         srpt_unmap_sg_to_ib_sge(ch, ioctx, scmnd);
2309
2310         return -ENOMEM;
2311 }
2312
2313 static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
2314                                     struct srpt_ioctx *ioctx,
2315                                     struct scst_cmd *scmnd)
2316 {
2317         struct scatterlist *scat;
2318         scst_data_direction dir;
2319
2320         TRACE_ENTRY();
2321
2322         scat = scst_cmd_get_sg(scmnd);
2323
2324         TRACE_DBG("n_rdma = %d; rdma_ius = %p; scat = %p\n",
2325                   ioctx->n_rdma, ioctx->rdma_ius, scat);
2326
2327         BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
2328
2329         while (ioctx->n_rdma)
2330                 kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
2331
2332         kfree(ioctx->rdma_ius);
2333         ioctx->rdma_ius = NULL;
2334
2335         if (scat) {
2336                 dir = scst_cmd_get_data_direction(scmnd);
2337                 ib_dma_unmap_sg(ch->sport->sdev->device,
2338                                 scat, scst_cmd_get_sg_cnt(scmnd),
2339                                 scst_to_tgt_dma_dir(dir));
2340         }
2341
2342         TRACE_EXIT();
2343 }
2344
2345 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2346                               scst_data_direction dir)
2347 {
2348         struct ib_send_wr wr;
2349         struct ib_send_wr *bad_wr;
2350         struct rdma_iu *riu;
2351         int i;
2352         int ret;
2353         int srq_wr_avail;
2354
2355         if (dir == SCST_DATA_WRITE) {
2356                 ret = -ENOMEM;
2357                 srq_wr_avail = atomic_sub_return(ioctx->n_rdma,
2358                                                  &ch->qp_wr_avail);
2359                 if (srq_wr_avail < 0) {
2360                         atomic_add(ioctx->n_rdma, &ch->qp_wr_avail);
2361                         PRINT_INFO("%s[%d]: SRQ full", __func__, __LINE__);
2362                         goto out;
2363                 }
2364         }
2365
2366         ret = 0;
2367         riu = ioctx->rdma_ius;
2368         memset(&wr, 0, sizeof wr);
2369
2370         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
2371                 wr.opcode = (dir == SCST_DATA_READ) ?
2372                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
2373                 wr.next = NULL;
2374                 wr.wr_id = ioctx->index;
2375                 wr.wr.rdma.remote_addr = riu->raddr;
2376                 wr.wr.rdma.rkey = riu->rkey;
2377                 wr.num_sge = riu->sge_cnt;
2378                 wr.sg_list = riu->sge;
2379
2380                 /* only get completion event for the last rdma wr */
2381                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
2382                         wr.send_flags = IB_SEND_SIGNALED;
2383
2384                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
2385                 if (ret)
2386                         goto out;
2387         }
2388
2389 out:
2390         return ret;
2391 }
2392
2393 /*
2394  * Start data transfer between initiator and target. Must not block.
2395  */
2396 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2397                           struct scst_cmd *scmnd)
2398 {
2399         int ret;
2400
2401         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
2402         if (ret) {
2403                 PRINT_ERROR("%s[%d] ret=%d", __func__, __LINE__, ret);
2404                 ret = SCST_TGT_RES_QUEUE_FULL;
2405                 goto out;
2406         }
2407
2408         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
2409         if (ret) {
2410                 if (ret == -EAGAIN || ret == -ENOMEM) {
2411                         PRINT_INFO("%s[%d] queue full -- ret=%d",
2412                                    __func__, __LINE__, ret);
2413                         ret = SCST_TGT_RES_QUEUE_FULL;
2414                 } else {
2415                         PRINT_ERROR("%s[%d] fatal error -- ret=%d",
2416                                     __func__, __LINE__, ret);
2417                         ret = SCST_TGT_RES_FATAL_ERROR;
2418                 }
2419                 goto out_unmap;
2420         }
2421
2422         ret = SCST_TGT_RES_SUCCESS;
2423
2424 out:
2425         return ret;
2426 out_unmap:
2427         srpt_unmap_sg_to_ib_sge(ch, ioctx, scmnd);
2428         goto out;
2429 }
2430
2431 /*
2432  * Called by the SCST core to inform ib_srpt that data reception from the
2433  * initiator should start (SCST_DATA_WRITE). Must not block.
2434  */
2435 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
2436 {
2437         struct srpt_rdma_ch *ch;
2438         struct srpt_ioctx *ioctx;
2439         enum rdma_ch_state ch_state;
2440         int ret;
2441
2442         ioctx = scst_cmd_get_tgt_priv(scmnd);
2443         BUG_ON(!ioctx);
2444
2445         WARN_ON(srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA)
2446                 == SRPT_STATE_ABORTED);
2447
2448         ch = ioctx->ch;
2449         WARN_ON(ch != scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd)));
2450         BUG_ON(!ch);
2451
2452         ch_state = atomic_read(&ch->state);
2453         if (ch_state == RDMA_CHANNEL_DISCONNECTING) {
2454                 TRACE_DBG("cmd with tag %lld: channel disconnecting",
2455                           scst_cmd_get_tag(scmnd));
2456                 ret = SCST_TGT_RES_FATAL_ERROR;
2457                 goto out;
2458         } else if (ch_state == RDMA_CHANNEL_CONNECTING) {
2459                 ret = SCST_TGT_RES_QUEUE_FULL;
2460                 goto out;
2461         }
2462         ret = srpt_xfer_data(ch, ioctx, scmnd);
2463
2464 out:
2465         return ret;
2466 }
2467
2468 /*
2469  * Called by the SCST core. Transmits the response buffer and status held in
2470  * 'scmnd'. Must not block.
2471  */
2472 static int srpt_xmit_response(struct scst_cmd *scmnd)
2473 {
2474         struct srpt_rdma_ch *ch;
2475         struct srpt_ioctx *ioctx;
2476         struct srp_rsp *srp_rsp;
2477         u64 tag;
2478         int ret = SCST_TGT_RES_SUCCESS;
2479         int dir;
2480         int status;
2481
2482         ioctx = scst_cmd_get_tgt_priv(scmnd);
2483         BUG_ON(!ioctx);
2484
2485         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2486         BUG_ON(!ch);
2487
2488         if (unlikely(scst_cmd_aborted(scmnd))) {
2489                 TRACE_DBG("cmd with tag %lld has been aborted",
2490                           scst_cmd_get_tag(scmnd));
2491                 srpt_abort_scst_cmd(ch->sport->sdev, scmnd);
2492                 ret = SCST_TGT_RES_SUCCESS;
2493                 goto out;
2494         }
2495
2496         if (srpt_set_cmd_state(ioctx, SRPT_STATE_PROCESSED)
2497             == SRPT_STATE_ABORTED) {
2498                 ret = SCST_TGT_RES_SUCCESS;
2499                 goto out;
2500         }
2501
2502         tag = scst_cmd_get_tag(scmnd);
2503         dir = scst_cmd_get_data_direction(scmnd);
2504         status = scst_cmd_get_status(scmnd) & 0xff;
2505
2506         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2507
2508         srp_rsp = ioctx->buf;
2509
2510         if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2511                 unsigned int max_sense_len;
2512
2513                 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2514                 BUILD_BUG_ON(MIN_MAX_MESSAGE_SIZE <= sizeof(*srp_rsp));
2515                 WARN_ON(srp_max_message_size <= sizeof(*srp_rsp));
2516                 max_sense_len = srp_max_message_size - sizeof(*srp_rsp);
2517                 if (srp_rsp->sense_data_len > max_sense_len) {
2518                         PRINT_WARNING("truncated sense data from %d to %d"
2519                                 " bytes", srp_rsp->sense_data_len,
2520                                 max_sense_len);
2521                         srp_rsp->sense_data_len = max_sense_len;
2522                 }
2523
2524                 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2525                        srp_rsp->sense_data_len);
2526
2527                 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2528                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2529
2530                 if (!status)
2531                         status = SAM_STAT_CHECK_CONDITION;
2532         }
2533
2534         srp_rsp->status = status;
2535
2536         /* For read commands, transfer the data to the initiator. */
2537         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2538                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2539                 if (ret != SCST_TGT_RES_SUCCESS) {
2540                         PRINT_ERROR("%s: tag= %lld xfer_data failed",
2541                                     __func__, (unsigned long long)tag);
2542                         goto out;
2543                 }
2544         }
2545
2546         if (srpt_post_send(ch, ioctx,
2547                            sizeof *srp_rsp +
2548                            be32_to_cpu(srp_rsp->sense_data_len))) {
2549                 PRINT_ERROR("%s[%d]: ch->state= %d tag= %lld",
2550                             __func__, __LINE__, atomic_read(&ch->state),
2551                             (unsigned long long)tag);
2552                 ret = SCST_TGT_RES_FATAL_ERROR;
2553         }
2554
2555 out:
2556         return ret;
2557 }
2558
2559 /*
2560  * Called by the SCST core to inform ib_srpt that a received task management
2561  * function has been completed. Must not block.
2562  */
2563 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2564 {
2565         struct srpt_rdma_ch *ch;
2566         struct srpt_mgmt_ioctx *mgmt_ioctx;
2567         struct srpt_ioctx *ioctx;
2568         int rsp_len;
2569
2570         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2571         BUG_ON(!mgmt_ioctx);
2572
2573         ch = mgmt_ioctx->ch;
2574         BUG_ON(!ch);
2575
2576         ioctx = mgmt_ioctx->ioctx;
2577         BUG_ON(!ioctx);
2578
2579         TRACE_DBG("%s: tsk_mgmt_done for tag= %lld status=%d",
2580                   __func__, (unsigned long long)mgmt_ioctx->tag,
2581                   scst_mgmt_cmd_get_status(mcmnd));
2582
2583         if (srpt_set_cmd_state(ioctx, SRPT_STATE_PROCESSED)
2584             == SRPT_STATE_ABORTED)
2585                 goto out;
2586
2587         rsp_len = srpt_build_tskmgmt_rsp(ch, ioctx,
2588                                          (scst_mgmt_cmd_get_status(mcmnd) ==
2589                                           SCST_MGMT_STATUS_SUCCESS) ?
2590                                          SRP_TSK_MGMT_SUCCESS :
2591                                          SRP_TSK_MGMT_FAILED,
2592                                          mgmt_ioctx->tag);
2593         srpt_post_send(ch, ioctx, rsp_len);
2594
2595         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2596
2597         kfree(mgmt_ioctx);
2598
2599 out:
2600         ;
2601 }
2602
2603 /*
2604  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2605  * to be freed. May be called in IRQ context.
2606  */
2607 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2608 {
2609         struct srpt_rdma_ch *ch;
2610         struct srpt_ioctx *ioctx;
2611
2612         ioctx = scst_cmd_get_tgt_priv(scmnd);
2613         BUG_ON(!ioctx);
2614
2615         srpt_set_cmd_state(ioctx, SRPT_STATE_ABORTED);
2616         ch = ioctx->ch;
2617         BUG_ON(!ch);
2618         ioctx->ch = NULL;
2619
2620         srpt_reset_ioctx(ch, ioctx);
2621         scst_cmd_set_tgt_priv(scmnd, NULL);
2622 }
2623
2624 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2625 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2626 static void srpt_refresh_port_work(void *ctx)
2627 #else
2628 static void srpt_refresh_port_work(struct work_struct *work)
2629 #endif
2630 {
2631 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2632         struct srpt_port *sport = (struct srpt_port *)ctx;
2633 #else
2634         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2635 #endif
2636
2637         srpt_refresh_port(sport);
2638 }
2639
2640 /*
2641  * Called by the SCST core to detect target adapters. Returns the number of
2642  * detected target adapters.
2643  */
2644 static int srpt_detect(struct scst_tgt_template *tp)
2645 {
2646         int device_count;
2647
2648         TRACE_ENTRY();
2649
2650         device_count = atomic_read(&srpt_device_count);
2651
2652         TRACE_EXIT_RES(device_count);
2653
2654         return device_count;
2655 }
2656
2657 /*
2658  * Callback function called by the SCST core from scst_unregister() to free up
2659  * the resources associated with device scst_tgt.
2660  */
2661 static int srpt_release(struct scst_tgt *scst_tgt)
2662 {
2663         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2664         struct srpt_rdma_ch *ch, *tmp_ch;
2665
2666         TRACE_ENTRY();
2667
2668         BUG_ON(!scst_tgt);
2669 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2670         WARN_ON(!sdev);
2671         if (!sdev)
2672                 return -ENODEV;
2673 #else
2674         if (WARN_ON(!sdev))
2675                 return -ENODEV;
2676 #endif
2677
2678 #ifdef CONFIG_SCST_PROC
2679         srpt_unregister_procfs_entry(scst_tgt->tgtt);
2680 #endif /*CONFIG_SCST_PROC*/
2681
2682         spin_lock_irq(&sdev->spinlock);
2683         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
2684                 list_del(&ch->list);
2685                 spin_unlock_irq(&sdev->spinlock);
2686                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
2687                 scst_unregister_session(ch->scst_sess, true,
2688                                         srpt_release_channel);
2689                 spin_lock_irq(&sdev->spinlock);
2690         }
2691         spin_unlock_irq(&sdev->spinlock);
2692
2693         srpt_unregister_mad_agent(sdev);
2694
2695         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2696
2697         TRACE_EXIT();
2698
2699         return 0;
2700 }
2701
2702 /*
2703  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2704  * when the module parameter 'thread' is not zero (the default is zero).
2705  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2706  *
2707  * @pre thread != 0
2708  */
2709 static int srpt_ioctx_thread(void *arg)
2710 {
2711         struct srpt_ioctx *ioctx;
2712
2713         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2714         current->flags |= PF_NOFREEZE;
2715
2716         spin_lock_irq(&srpt_thread.thread_lock);
2717         while (!kthread_should_stop()) {
2718                 wait_queue_t wait;
2719                 init_waitqueue_entry(&wait, current);
2720
2721                 if (!srpt_test_ioctx_list()) {
2722                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2723
2724                         for (;;) {
2725                                 set_current_state(TASK_INTERRUPTIBLE);
2726                                 if (srpt_test_ioctx_list())
2727                                         break;
2728                                 spin_unlock_irq(&srpt_thread.thread_lock);
2729                                 schedule();
2730                                 spin_lock_irq(&srpt_thread.thread_lock);
2731                         }
2732                         set_current_state(TASK_RUNNING);
2733                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2734                 }
2735
2736                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2737                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2738                                            struct srpt_ioctx, comp_list);
2739
2740                         list_del(&ioctx->comp_list);
2741
2742                         spin_unlock_irq(&srpt_thread.thread_lock);
2743                         switch (ioctx->op) {
2744                         case IB_WC_SEND:
2745                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2746                                         SCST_CONTEXT_DIRECT);
2747                                 break;
2748                         case IB_WC_RDMA_WRITE:
2749                         case IB_WC_RDMA_READ:
2750                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2751                                 break;
2752                         case IB_WC_RECV:
2753                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2754                                 break;
2755                         default:
2756                                 break;
2757                         }
2758 #if defined(CONFIG_SCST_DEBUG)
2759                         if (thread_processing_delay_in_us
2760                             <= MAX_UDELAY_MS * 1000)
2761                                 udelay(thread_processing_delay_in_us);
2762 #endif
2763                         spin_lock_irq(&srpt_thread.thread_lock);
2764                 }
2765         }
2766         spin_unlock_irq(&srpt_thread.thread_lock);
2767
2768         return 0;
2769 }
2770
2771 /* SCST target template for the SRP target implementation. */
2772 static struct scst_tgt_template srpt_template = {
2773         .name = DRV_NAME,
2774         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2775         .xmit_response_atomic = 1,
2776         .rdy_to_xfer_atomic = 1,
2777         .detect = srpt_detect,
2778         .release = srpt_release,
2779         .xmit_response = srpt_xmit_response,
2780         .rdy_to_xfer = srpt_rdy_to_xfer,
2781         .on_free_cmd = srpt_on_free_cmd,
2782         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2783 };
2784
2785 /*
2786  * The callback function srpt_release_class_dev() is called whenever a
2787  * device is removed from the /sys/class/infiniband_srpt device class.
2788  * Although this function has been left empty, a release function has been
2789  * defined such that upon module removal no complaint is logged about a
2790  * missing release function.
2791  */
2792 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2793 static void srpt_release_class_dev(struct class_device *class_dev)
2794 #else
2795 static void srpt_release_class_dev(struct device *dev)
2796 #endif
2797 {
2798 }
2799
2800 #ifdef CONFIG_SCST_PROC
2801
2802 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2803 static int srpt_trace_level_show(struct seq_file *seq, void *v)
2804 {
2805         return scst_proc_log_entry_read(seq, trace_flag, NULL);
2806 }
2807
2808 static ssize_t srpt_proc_trace_level_write(struct file *file,
2809         const char __user *buf, size_t length, loff_t *off)
2810 {
2811         return scst_proc_log_entry_write(file, buf, length, &trace_flag,
2812                 DEFAULT_SRPT_TRACE_FLAGS, NULL);
2813 }
2814
2815 static struct scst_proc_data srpt_log_proc_data = {
2816         SCST_DEF_RW_SEQ_OP(srpt_proc_trace_level_write)
2817         .show = srpt_trace_level_show,
2818 };
2819 #endif
2820
2821 #endif /* CONFIG_SCST_PROC */
2822
2823 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2824 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2825 #else
2826 static ssize_t show_login_info(struct device *dev,
2827                                struct device_attribute *attr, char *buf)
2828 #endif
2829 {
2830         struct srpt_device *sdev =
2831 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2832                 container_of(class_dev, struct srpt_device, class_dev);
2833 #else
2834                 container_of(dev, struct srpt_device, dev);
2835 #endif
2836         struct srpt_port *sport;
2837         int i;
2838         int len = 0;
2839
2840         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2841                 sport = &sdev->port[i];
2842
2843                 len += sprintf(buf + len,
2844                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2845                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2846                                "service_id=%016llx\n",
2847                                (unsigned long long) srpt_service_guid,
2848                                (unsigned long long) srpt_service_guid,
2849                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2850                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2851                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2852                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2853                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2854                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2855                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2856                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2857                                (unsigned long long) srpt_service_guid);
2858         }
2859
2860         return len;
2861 }
2862
2863 static struct class_attribute srpt_class_attrs[] = {
2864         __ATTR_NULL,
2865 };
2866
2867 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2868 static struct class_device_attribute srpt_dev_attrs[] = {
2869 #else
2870 static struct device_attribute srpt_dev_attrs[] = {
2871 #endif
2872         __ATTR(login_info, S_IRUGO, show_login_info, NULL),
2873         __ATTR_NULL,
2874 };
2875
2876 static struct class srpt_class = {
2877         .name        = "infiniband_srpt",
2878 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2879         .release = srpt_release_class_dev,
2880 #else
2881         .dev_release = srpt_release_class_dev,
2882 #endif
2883         .class_attrs = srpt_class_attrs,
2884 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2885         .class_dev_attrs = srpt_dev_attrs,
2886 #else
2887         .dev_attrs   = srpt_dev_attrs,
2888 #endif
2889 };
2890
2891 /*
2892  * Callback function called by the InfiniBand core when either an InfiniBand
2893  * device has been added or during the ib_register_client() call for each
2894  * registered InfiniBand device.
2895  */
2896 static void srpt_add_one(struct ib_device *device)
2897 {
2898         struct srpt_device *sdev;
2899         struct srpt_port *sport;
2900         struct ib_srq_init_attr srq_attr;
2901         int i;
2902
2903         TRACE_ENTRY();
2904
2905         TRACE_DBG("device = %p, device->dma_ops = %p", device, device->dma_ops);
2906
2907         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2908         if (!sdev)
2909                 return;
2910
2911         sdev->device = device;
2912
2913 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2914         sdev->class_dev.class = &srpt_class;
2915         sdev->class_dev.dev = device->dma_device;
2916         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2917                  "srpt-%s", device->name);
2918 #else
2919         sdev->dev.class = &srpt_class;
2920         sdev->dev.parent = device->dma_device;
2921 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2922         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2923 #else
2924         dev_set_name(&sdev->dev, "srpt-%s", device->name);
2925 #endif
2926 #endif
2927
2928 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2929         if (class_device_register(&sdev->class_dev))
2930                 goto free_dev;
2931 #else
2932         if (device_register(&sdev->dev))
2933                 goto free_dev;
2934 #endif
2935
2936         if (ib_query_device(device, &sdev->dev_attr))
2937                 goto err_dev;
2938
2939         sdev->pd = ib_alloc_pd(device);
2940         if (IS_ERR(sdev->pd))
2941                 goto err_dev;
2942
2943         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2944         if (IS_ERR(sdev->mr))
2945                 goto err_pd;
2946
2947         srq_attr.event_handler = srpt_srq_event;
2948         srq_attr.srq_context = (void *)sdev;
2949         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2950         srq_attr.attr.max_sge = 1;
2951         srq_attr.attr.srq_limit = 0;
2952
2953         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2954         if (IS_ERR(sdev->srq))
2955                 goto err_mr;
2956
2957         TRACE_DBG("%s: create SRQ #wr= %d max_allow=%d dev= %s",
2958                __func__, srq_attr.attr.max_wr,
2959               sdev->dev_attr.max_srq_wr, device->name);
2960
2961         if (!srpt_service_guid)
2962                 srpt_service_guid = be64_to_cpu(device->node_guid);
2963
2964         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2965         if (IS_ERR(sdev->cm_id))
2966                 goto err_srq;
2967
2968         /* print out target login information */
2969         TRACE_DBG("Target login info: id_ext=%016llx,"
2970                   "ioc_guid=%016llx,pkey=ffff,service_id=%016llx",
2971                   (unsigned long long) srpt_service_guid,
2972                   (unsigned long long) srpt_service_guid,
2973                   (unsigned long long) srpt_service_guid);
2974
2975         /*
2976          * We do not have a consistent service_id (ie. also id_ext of target_id)
2977          * to identify this target. We currently use the guid of the first HCA
2978          * in the system as service_id; therefore, the target_id will change
2979          * if this HCA is gone bad and replaced by different HCA
2980          */
2981         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0, NULL))
2982                 goto err_cm;
2983
2984         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2985                               srpt_event_handler);
2986         if (ib_register_event_handler(&sdev->event_handler))
2987                 goto err_cm;
2988
2989         if (srpt_alloc_ioctx_ring(sdev))
2990                 goto err_event;
2991
2992         INIT_LIST_HEAD(&sdev->rch_list);
2993         spin_lock_init(&sdev->spinlock);
2994
2995         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2996                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2997
2998         ib_set_client_data(device, &srpt_client, sdev);
2999
3000         sdev->scst_tgt = scst_register(&srpt_template, NULL);
3001         if (!sdev->scst_tgt) {
3002                 PRINT_ERROR("SCST registration failed for %s.",
3003                             sdev->device->name);
3004                 goto err_ring;
3005         }
3006
3007         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
3008
3009         WARN_ON(sdev->device->phys_port_cnt
3010                 > sizeof(sdev->port)/sizeof(sdev->port[0]));
3011
3012         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
3013                 sport = &sdev->port[i - 1];
3014                 sport->sdev = sdev;
3015                 sport->port = i;
3016 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
3017                 /*
3018                  * A vanilla 2.6.19 or older kernel without backported OFED
3019                  * kernel headers.
3020                  */
3021                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
3022 #else
3023                 INIT_WORK(&sport->work, srpt_refresh_port_work);
3024 #endif
3025                 if (srpt_refresh_port(sport)) {
3026                         PRINT_ERROR("MAD registration failed for %s-%d.",
3027                                     sdev->device->name, i);
3028                         goto err_refresh_port;
3029                 }
3030         }
3031
3032         atomic_inc(&srpt_device_count);
3033
3034         TRACE_EXIT();
3035
3036         return;
3037
3038 err_refresh_port:
3039         scst_unregister(sdev->scst_tgt);
3040 err_ring:
3041         ib_set_client_data(device, &srpt_client, NULL);
3042         srpt_free_ioctx_ring(sdev);
3043 err_event:
3044         ib_unregister_event_handler(&sdev->event_handler);
3045 err_cm:
3046         ib_destroy_cm_id(sdev->cm_id);
3047 err_srq:
3048         ib_destroy_srq(sdev->srq);
3049 err_mr:
3050         ib_dereg_mr(sdev->mr);
3051 err_pd:
3052         ib_dealloc_pd(sdev->pd);
3053 err_dev:
3054 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3055         class_device_unregister(&sdev->class_dev);
3056 #else
3057         device_unregister(&sdev->dev);
3058 #endif
3059 free_dev:
3060         kfree(sdev);
3061
3062         TRACE_EXIT();
3063 }
3064
3065 /*
3066  * Callback function called by the InfiniBand core when either an InfiniBand
3067  * device has been removed or during the ib_unregister_client() call for each
3068  * registered InfiniBand device.
3069  */
3070 static void srpt_remove_one(struct ib_device *device)
3071 {
3072         int i;
3073         struct srpt_device *sdev;
3074
3075         TRACE_ENTRY();
3076
3077         sdev = ib_get_client_data(device, &srpt_client);
3078 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
3079         WARN_ON(!sdev);
3080         if (!sdev)
3081                 return;
3082 #else
3083         if (WARN_ON(!sdev))
3084                 return;
3085 #endif
3086
3087         /*
3088          * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
3089          * finished if it is running.
3090          */
3091         for (i = 0; i < sdev->device->phys_port_cnt; i++)
3092 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
3093                 cancel_work_sync(&sdev->port[i].work);
3094 #else
3095                 /*
3096                  * cancel_work_sync() was introduced in kernel 2.6.22. Older
3097                  * kernels do not have a facility to cancel scheduled work.
3098                  */
3099                 PRINT_ERROR("%s",
3100                        "your kernel does not provide cancel_work_sync().");
3101 #endif
3102
3103         scst_unregister(sdev->scst_tgt);
3104         sdev->scst_tgt = NULL;
3105
3106         ib_unregister_event_handler(&sdev->event_handler);
3107         ib_destroy_cm_id(sdev->cm_id);
3108         ib_destroy_srq(sdev->srq);
3109         ib_dereg_mr(sdev->mr);
3110         ib_dealloc_pd(sdev->pd);
3111 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3112         class_device_unregister(&sdev->class_dev);
3113 #else
3114         device_unregister(&sdev->dev);
3115 #endif
3116
3117         srpt_free_ioctx_ring(sdev);
3118         kfree(sdev);
3119
3120         TRACE_EXIT();
3121 }
3122
3123 #ifdef CONFIG_SCST_PROC
3124
3125 /**
3126  * Create procfs entries for srpt. Currently the only procfs entry created
3127  * by this function is the "trace_level" entry.
3128  */
3129 static int srpt_register_procfs_entry(struct scst_tgt_template *tgt)
3130 {
3131         int res = 0;
3132 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
3133         struct proc_dir_entry *p, *root;
3134
3135         root = scst_proc_get_tgt_root(tgt);
3136         WARN_ON(!root);
3137         if (root) {
3138                 /*
3139                  * Fill in the scst_proc_data::data pointer, which is used in
3140                  * a printk(KERN_INFO ...) statement in
3141                  * scst_proc_log_entry_write() in scst_proc.c.
3142                  */
3143                 srpt_log_proc_data.data = (char *)tgt->name;
3144                 p = scst_create_proc_entry(root, SRPT_PROC_TRACE_LEVEL_NAME,
3145                                            &srpt_log_proc_data);
3146                 if (!p)
3147                         res = -ENOMEM;
3148         } else
3149                 res = -ENOMEM;
3150
3151 #endif
3152         return res;
3153 }
3154
3155 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt)
3156 {
3157 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
3158         struct proc_dir_entry *root;
3159
3160         root = scst_proc_get_tgt_root(tgt);
3161         WARN_ON(!root);
3162         if (root)
3163                 remove_proc_entry(SRPT_PROC_TRACE_LEVEL_NAME, root);
3164 #endif
3165 }
3166
3167 #endif /*CONFIG_SCST_PROC*/
3168
3169 /*
3170  * Module initialization.
3171  *
3172  * Note: since ib_register_client() registers callback functions, and since at
3173  * least one of these callback functions (srpt_add_one()) calls SCST functions,
3174  * the SCST target template must be registered before ib_register_client() is
3175  * called.
3176  */
3177 static int __init srpt_init_module(void)
3178 {
3179         int ret;
3180
3181         ret = -EINVAL;
3182         if (srp_max_message_size < MIN_MAX_MESSAGE_SIZE) {
3183                 PRINT_ERROR("invalid value %d for kernel module parameter"
3184                             " srp_max_message_size -- must be at least %d.",
3185                             srp_max_message_size,
3186                             MIN_MAX_MESSAGE_SIZE);
3187                 goto out;
3188         }
3189
3190         ret = class_register(&srpt_class);
3191         if (ret) {
3192                 PRINT_ERROR("%s", "couldn't register class ib_srpt");
3193                 goto out;
3194         }
3195
3196         ret = scst_register_target_template(&srpt_template);
3197         if (ret < 0) {
3198                 PRINT_ERROR("%s", "couldn't register with scst");
3199                 ret = -ENODEV;
3200                 goto out_unregister_class;
3201         }
3202
3203 #ifdef CONFIG_SCST_PROC
3204         ret = srpt_register_procfs_entry(&srpt_template);
3205         if (ret) {
3206                 PRINT_ERROR("%s", "couldn't register procfs entry");
3207                 goto out_unregister_target;
3208         }
3209 #endif /*CONFIG_SCST_PROC*/
3210
3211         ret = ib_register_client(&srpt_client);
3212         if (ret) {
3213                 PRINT_ERROR("%s", "couldn't register IB client");
3214                 goto out_unregister_target;
3215         }
3216
3217         if (thread) {
3218                 spin_lock_init(&srpt_thread.thread_lock);
3219                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
3220                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
3221                                                  NULL, "srpt_thread");
3222                 if (IS_ERR(srpt_thread.thread)) {
3223                         srpt_thread.thread = NULL;
3224                         thread = 0;
3225                 }
3226         }
3227
3228         return 0;
3229
3230 out_unregister_target:
3231 #ifdef CONFIG_SCST_PROC
3232         /*
3233          * Note: the procfs entry is unregistered in srpt_release(), which is
3234          * called by scst_unregister_target_template().
3235          */
3236 #endif /*CONFIG_SCST_PROC*/
3237         scst_unregister_target_template(&srpt_template);
3238 out_unregister_class:
3239         class_unregister(&srpt_class);
3240 out:
3241         return ret;
3242 }
3243
3244 static void __exit srpt_cleanup_module(void)
3245 {
3246         TRACE_ENTRY();
3247
3248         if (srpt_thread.thread)
3249                 kthread_stop(srpt_thread.thread);
3250         ib_unregister_client(&srpt_client);
3251         scst_unregister_target_template(&srpt_template);
3252         class_unregister(&srpt_class);
3253
3254         TRACE_EXIT();
3255 }
3256
3257 module_init(srpt_init_module);
3258 module_exit(srpt_cleanup_module);