794b978d127c18243046f8793c5f754a186e77e4
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43 #include <linux/delay.h>
44 #include <asm/atomic.h>
45 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
46 #include <linux/proc_fs.h>
47 #include <linux/seq_file.h>
48 #endif
49 #include "ib_srpt.h"
50 #define LOG_PREFIX "ib_srpt" /* Prefix for SCST tracing macros. */
51 #include "scst_debug.h"
52
53 #define CONFIG_SCST_PROC
54
55 /* Name of this kernel module. */
56 #define DRV_NAME                "ib_srpt"
57 #define DRV_VERSION             "1.0.1"
58 #define DRV_RELDATE             "July 10, 2008"
59 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
60 /* Flags to be used in SCST debug tracing statements. */
61 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
62                                   | TRACE_MGMT | TRACE_SPECIAL)
63 /* Name of the entry that will be created under /proc/scsi_tgt/ib_srpt. */
64 #define SRPT_PROC_TRACE_LEVEL_NAME      "trace_level"
65 #endif
66
67 #define MELLANOX_SRPT_ID_STRING "SCST SRP target"
68
69 MODULE_AUTHOR("Vu Pham");
70 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
71                    "v" DRV_VERSION " (" DRV_RELDATE ")");
72 MODULE_LICENSE("Dual BSD/GPL");
73
74 struct srpt_thread {
75         /* Protects thread_ioctx_list. */
76         spinlock_t thread_lock;
77         /* I/O contexts to be processed by the kernel thread. */
78         struct list_head thread_ioctx_list;
79         /* SRPT kernel thread. */
80         struct task_struct *thread;
81 };
82
83 /*
84  * Global Variables
85  */
86
87 static u64 srpt_service_guid;
88 /* List of srpt_device structures. */
89 static atomic_t srpt_device_count;
90 static int use_port_guid_in_session_name;
91 static int thread = 1;
92 static struct srpt_thread srpt_thread;
93 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
94 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
95 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
96 module_param(trace_flag, long, 0644);
97 MODULE_PARM_DESC(trace_flag,
98                  "Trace flags for the ib_srpt kernel module.");
99 #endif
100 #if defined(CONFIG_SCST_DEBUG)
101 static unsigned long interrupt_processing_delay_in_us;
102 module_param(interrupt_processing_delay_in_us, long, 0744);
103 MODULE_PARM_DESC(interrupt_processing_delay_in_us,
104                  "CQ completion handler interrupt delay in microseconds.");
105 static unsigned long thread_processing_delay_in_us;
106 module_param(thread_processing_delay_in_us, long, 0744);
107 MODULE_PARM_DESC(thread_processing_delay_in_us,
108                  "SRP thread processing delay in microseconds.");
109 #endif
110
111 module_param(thread, int, 0444);
112 MODULE_PARM_DESC(thread,
113                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
114                  "where possible.");
115
116 static unsigned int srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE;
117 module_param(srp_max_rdma_size, int, 0744);
118 MODULE_PARM_DESC(thread,
119                  "Maximum size of SRP RDMA transfers for new connections.");
120
121 static unsigned int srp_max_message_size = DEFAULT_MAX_MESSAGE_SIZE;
122 module_param(srp_max_message_size, int, 0444);
123 MODULE_PARM_DESC(thread,
124                  "Maximum size of SRP control messages in bytes.");
125
126 module_param(use_port_guid_in_session_name, bool, 0444);
127 MODULE_PARM_DESC(use_port_guid_in_session_name,
128                  "Use target port ID in the SCST session name such that"
129                  " redundant paths between multiport systems can be masked.");
130
131 static void srpt_add_one(struct ib_device *device);
132 static void srpt_remove_one(struct ib_device *device);
133 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
134 #ifdef CONFIG_SCST_PROC
135 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt);
136 #endif /*CONFIG_SCST_PROC*/
137 static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
138                                     struct srpt_ioctx *ioctx);
139 static void srpt_release_channel(struct scst_session *scst_sess);
140
141 static struct ib_client srpt_client = {
142         .name = DRV_NAME,
143         .add = srpt_add_one,
144         .remove = srpt_remove_one
145 };
146
147 /**
148  * Atomically test and set the channel state.
149  * @ch: RDMA channel.
150  * @old: channel state to compare with.
151  * @new: state to change the channel state to if the current state matches the
152  *       argument 'old'.
153  *
154  * Returns the previous channel state.
155  */
156 static enum rdma_ch_state
157 srpt_test_and_set_channel_state(struct srpt_rdma_ch *ch,
158                                 enum rdma_ch_state old,
159                                 enum rdma_ch_state new)
160 {
161         return atomic_cmpxchg(&ch->state, old, new);
162 }
163
164 /*
165  * Callback function called by the InfiniBand core when an asynchronous IB
166  * event occurs. This callback may occur in interrupt context. See also
167  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
168  * Architecture Specification.
169  */
170 static void srpt_event_handler(struct ib_event_handler *handler,
171                                struct ib_event *event)
172 {
173         struct srpt_device *sdev;
174         struct srpt_port *sport;
175
176         TRACE_ENTRY();
177
178         sdev = ib_get_client_data(event->device, &srpt_client);
179         if (!sdev || sdev->device != event->device)
180                 return;
181
182         TRACE_DBG("ASYNC event= %d on device= %s",
183                   event->event, sdev->device->name);
184
185         switch (event->event) {
186         case IB_EVENT_PORT_ERR:
187                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
188                         sport = &sdev->port[event->element.port_num - 1];
189                         sport->lid = 0;
190                         sport->sm_lid = 0;
191                 }
192                 break;
193         case IB_EVENT_PORT_ACTIVE:
194         case IB_EVENT_LID_CHANGE:
195         case IB_EVENT_PKEY_CHANGE:
196         case IB_EVENT_SM_CHANGE:
197         case IB_EVENT_CLIENT_REREGISTER:
198                 /*
199                  * Refresh port data asynchronously. Note: it is safe to call
200                  * schedule_work() even if &sport->work is already on the
201                  * global workqueue because schedule_work() tests for the
202                  * work_pending() condition before adding &sport->work to the
203                  * global work queue.
204                  */
205                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
206                         sport = &sdev->port[event->element.port_num - 1];
207                         if (!sport->lid && !sport->sm_lid)
208                                 schedule_work(&sport->work);
209                 }
210                 break;
211         default:
212                 break;
213         }
214
215         TRACE_EXIT();
216 }
217
218 /*
219  * Callback function called by the InfiniBand core for SRQ (shared receive
220  * queue) events.
221  */
222 static void srpt_srq_event(struct ib_event *event, void *ctx)
223 {
224         TRACE_ENTRY();
225
226         TRACE_DBG("SRQ event %d", event->event);
227
228         TRACE_EXIT();
229 }
230
231 /*
232  * Callback function called by the InfiniBand core for QP (queue pair) events.
233  */
234 static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
235 {
236         TRACE_DBG("QP event %d on cm_id=%p sess_name=%s state=%d",
237                   event->event, ch->cm_id, ch->sess_name,
238                   atomic_read(&ch->state));
239
240         switch (event->event) {
241         case IB_EVENT_COMM_EST:
242 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
243                 ib_cm_notify(ch->cm_id, event->event);
244 #else
245                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
246                 PRINT_ERROR("%s", "how to perform ib_cm_notify() on a"
247                             " vanilla 2.6.18 kernel ???");
248 #endif
249                 break;
250         case IB_EVENT_QP_LAST_WQE_REACHED:
251                 if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_LIVE,
252                         RDMA_CHANNEL_DISCONNECTING) == RDMA_CHANNEL_LIVE) {
253                         PRINT_INFO("disconnected session %s.", ch->sess_name);
254                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
255                 }
256                 break;
257         default:
258                 break;
259         }
260 }
261
262 /*
263  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
264  * the lowest four bits of value in element slot of the array of four bit
265  * elements called c_list (controller list). The index slot is one-based.
266  *
267  * @pre 1 <= slot && 0 <= value && value < 16
268  */
269 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
270 {
271         u16 id;
272         u8 tmp;
273
274         id = (slot - 1) / 2;
275         if (slot & 0x1) {
276                 tmp = c_list[id] & 0xf;
277                 c_list[id] = (value << 4) | tmp;
278         } else {
279                 tmp = c_list[id] & 0xf0;
280                 c_list[id] = (value & 0xf) | tmp;
281         }
282 }
283
284 /*
285  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
286  * ClassPortInfo in the InfiniBand Architecture Specification.
287  */
288 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
289 {
290         struct ib_class_port_info *cif;
291
292         cif = (struct ib_class_port_info *)mad->data;
293         memset(cif, 0, sizeof *cif);
294         cif->base_version = 1;
295         cif->class_version = 1;
296         cif->resp_time_value = 20;
297
298         mad->mad_hdr.status = 0;
299 }
300
301 /*
302  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
303  * InfiniBand Architecture Specification. See also section B.7,
304  * table B.6 in the T10 SRP r16a document.
305  */
306 static void srpt_get_iou(struct ib_dm_mad *mad)
307 {
308         struct ib_dm_iou_info *ioui;
309         u8 slot;
310         int i;
311
312         ioui = (struct ib_dm_iou_info *)mad->data;
313         ioui->change_id = 1;
314         ioui->max_controllers = 16;
315
316         /* set present for slot 1 and empty for the rest */
317         srpt_set_ioc(ioui->controller_list, 1, 1);
318         for (i = 1, slot = 2; i < 16; i++, slot++)
319                 srpt_set_ioc(ioui->controller_list, slot, 0);
320
321         mad->mad_hdr.status = 0;
322 }
323
324 /*
325  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
326  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
327  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
328  * document.
329  */
330 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
331                          struct ib_dm_mad *mad)
332 {
333         struct ib_dm_ioc_profile *iocp;
334
335         iocp = (struct ib_dm_ioc_profile *)mad->data;
336
337         if (!slot || slot > 16) {
338                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
339                 return;
340         }
341
342         if (slot > 2) {
343                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
344                 return;
345         }
346
347         memset(iocp, 0, sizeof *iocp);
348         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
349         iocp->guid = cpu_to_be64(srpt_service_guid);
350         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
351         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
352         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
353         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
354         iocp->subsys_device_id = 0x0;
355         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
356         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
357         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
358         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
359         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
360         iocp->rdma_read_depth = 4;
361         iocp->send_size = cpu_to_be32(srp_max_message_size);
362         iocp->rdma_size = cpu_to_be32(min(max(srp_max_rdma_size, 256U),
363                                           1U << 24));
364         iocp->num_svc_entries = 1;
365         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
366                 SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
367
368         mad->mad_hdr.status = 0;
369 }
370
371 /*
372  * Device management: write ServiceEntries to mad for the given slot. See also
373  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
374  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
375  */
376 static void srpt_get_svc_entries(u64 ioc_guid,
377                                  u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
378 {
379         struct ib_dm_svc_entries *svc_entries;
380
381         WARN_ON(!ioc_guid);
382
383         if (!slot || slot > 16) {
384                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
385                 return;
386         }
387
388         if (slot > 2 || lo > hi || hi > 1) {
389                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
390                 return;
391         }
392
393         svc_entries = (struct ib_dm_svc_entries *)mad->data;
394         memset(svc_entries, 0, sizeof *svc_entries);
395         svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid);
396         snprintf(svc_entries->service_entries[0].name,
397                  sizeof(svc_entries->service_entries[0].name),
398                  "%s%016llx",
399                  SRP_SERVICE_NAME_PREFIX,
400                  (unsigned long long)ioc_guid);
401
402         mad->mad_hdr.status = 0;
403 }
404
405 /*
406  * Actual processing of a received MAD *rq_mad received through source port *sp
407  * (MAD = InfiniBand management datagram). The response to be sent back is
408  * written to *rsp_mad.
409  */
410 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
411                                  struct ib_dm_mad *rsp_mad)
412 {
413         u16 attr_id;
414         u32 slot;
415         u8 hi, lo;
416
417         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
418         switch (attr_id) {
419         case DM_ATTR_CLASS_PORT_INFO:
420                 srpt_get_class_port_info(rsp_mad);
421                 break;
422         case DM_ATTR_IOU_INFO:
423                 srpt_get_iou(rsp_mad);
424                 break;
425         case DM_ATTR_IOC_PROFILE:
426                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
427                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
428                 break;
429         case DM_ATTR_SVC_ENTRIES:
430                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
431                 hi = (u8) ((slot >> 8) & 0xff);
432                 lo = (u8) (slot & 0xff);
433                 slot = (u16) ((slot >> 16) & 0xffff);
434                 srpt_get_svc_entries(srpt_service_guid,
435                                      slot, hi, lo, rsp_mad);
436                 break;
437         default:
438                 rsp_mad->mad_hdr.status =
439                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
440                 break;
441         }
442 }
443
444 /*
445  * Callback function that is called by the InfiniBand core after transmission of
446  * a MAD. (MAD = management datagram; AH = address handle.)
447  */
448 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
449                                   struct ib_mad_send_wc *mad_wc)
450 {
451         ib_destroy_ah(mad_wc->send_buf->ah);
452         ib_free_send_mad(mad_wc->send_buf);
453 }
454
455 /*
456  * Callback function that is called by the InfiniBand core after reception of
457  * a MAD (management datagram).
458  */
459 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
460                                   struct ib_mad_recv_wc *mad_wc)
461 {
462         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
463         struct ib_ah *ah;
464         struct ib_mad_send_buf *rsp;
465         struct ib_dm_mad *dm_mad;
466
467         if (!mad_wc || !mad_wc->recv_buf.mad)
468                 return;
469
470         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
471                                   mad_wc->recv_buf.grh, mad_agent->port_num);
472         if (IS_ERR(ah))
473                 goto err;
474
475         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
476
477         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
478                                  mad_wc->wc->pkey_index, 0,
479                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
480                                  GFP_KERNEL);
481         if (IS_ERR(rsp))
482                 goto err_rsp;
483
484         rsp->ah = ah;
485
486         dm_mad = rsp->mad;
487         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
488         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
489         dm_mad->mad_hdr.status = 0;
490
491         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
492         case IB_MGMT_METHOD_GET:
493                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
494                 break;
495         case IB_MGMT_METHOD_SET:
496                 dm_mad->mad_hdr.status =
497                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
498                 break;
499         default:
500                 dm_mad->mad_hdr.status =
501                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
502                 break;
503         }
504
505         if (!ib_post_send_mad(rsp, NULL)) {
506                 ib_free_recv_mad(mad_wc);
507                 /* will destroy_ah & free_send_mad in send completion */
508                 return;
509         }
510
511         ib_free_send_mad(rsp);
512
513 err_rsp:
514         ib_destroy_ah(ah);
515 err:
516         ib_free_recv_mad(mad_wc);
517 }
518
519 /*
520  * Enable InfiniBand management datagram processing, update the cached sm_lid,
521  * lid and gid values, and register a callback function for processing MADs
522  * on the specified port. It is safe to call this function more than once for
523  * the same port.
524  */
525 static int srpt_refresh_port(struct srpt_port *sport)
526 {
527         struct ib_mad_reg_req reg_req;
528         struct ib_port_modify port_modify;
529         struct ib_port_attr port_attr;
530         int ret;
531
532         TRACE_ENTRY();
533
534         memset(&port_modify, 0, sizeof port_modify);
535         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
536         port_modify.clr_port_cap_mask = 0;
537
538         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
539         if (ret)
540                 goto err_mod_port;
541
542         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
543         if (ret)
544                 goto err_query_port;
545
546         sport->sm_lid = port_attr.sm_lid;
547         sport->lid = port_attr.lid;
548
549         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
550         if (ret)
551                 goto err_query_port;
552
553         if (!sport->mad_agent) {
554                 memset(&reg_req, 0, sizeof reg_req);
555                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
556                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
557                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
558                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
559
560                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
561                                                          sport->port,
562                                                          IB_QPT_GSI,
563                                                          &reg_req, 0,
564                                                          srpt_mad_send_handler,
565                                                          srpt_mad_recv_handler,
566                                                          sport);
567                 if (IS_ERR(sport->mad_agent)) {
568                         ret = PTR_ERR(sport->mad_agent);
569                         sport->mad_agent = NULL;
570                         goto err_query_port;
571                 }
572         }
573
574         TRACE_EXIT_RES(0);
575
576         return 0;
577
578 err_query_port:
579
580         port_modify.set_port_cap_mask = 0;
581         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
582         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
583
584 err_mod_port:
585
586         TRACE_EXIT_RES(ret);
587
588         return ret;
589 }
590
591 /*
592  * Unregister the callback function for processing MADs and disable MAD
593  * processing for all ports of the specified device. It is safe to call this
594  * function more than once for the same device.
595  */
596 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
597 {
598         struct ib_port_modify port_modify = {
599                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
600         };
601         struct srpt_port *sport;
602         int i;
603
604         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
605                 sport = &sdev->port[i - 1];
606                 WARN_ON(sport->port != i);
607                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
608                         PRINT_ERROR("%s", "disabling MAD processing failed.");
609                 if (sport->mad_agent) {
610                         ib_unregister_mad_agent(sport->mad_agent);
611                         sport->mad_agent = NULL;
612                 }
613         }
614 }
615
616 /**
617  * Allocate and initialize an SRPT I/O context structure.
618  */
619 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
620 {
621         struct srpt_ioctx *ioctx;
622
623         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
624         if (!ioctx)
625                 goto out;
626
627         ioctx->buf = kzalloc(srp_max_message_size, GFP_KERNEL);
628         if (!ioctx->buf)
629                 goto out_free_ioctx;
630
631         ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf,
632                                        srp_max_message_size, DMA_BIDIRECTIONAL);
633         if (ib_dma_mapping_error(sdev->device, ioctx->dma))
634                 goto out_free_buf;
635
636         return ioctx;
637
638 out_free_buf:
639         kfree(ioctx->buf);
640 out_free_ioctx:
641         kfree(ioctx);
642 out:
643         return NULL;
644 }
645
646 /*
647  * Deallocate an SRPT I/O context structure.
648  */
649 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
650 {
651         if (!ioctx)
652                 return;
653
654         ib_dma_unmap_single(sdev->device, ioctx->dma,
655                             srp_max_message_size, DMA_BIDIRECTIONAL);
656         kfree(ioctx->buf);
657         kfree(ioctx);
658 }
659
660 /*
661  * Associate a ring of SRPT I/O context structures with the specified device.
662  */
663 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
664 {
665         int i;
666
667         TRACE_ENTRY();
668
669         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
670                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
671
672                 if (!sdev->ioctx_ring[i])
673                         goto err;
674
675                 sdev->ioctx_ring[i]->index = i;
676         }
677
678         TRACE_EXIT_RES(0);
679
680         return 0;
681
682 err:
683         while (--i > 0) {
684                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
685                 sdev->ioctx_ring[i] = NULL;
686         }
687         TRACE_EXIT_RES(-ENOMEM);
688         return -ENOMEM;
689 }
690
691 /* Free the ring of SRPT I/O context structures. */
692 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
693 {
694         int i;
695
696         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
697                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
698                 sdev->ioctx_ring[i] = NULL;
699         }
700 }
701
702 /**
703  * Set the state of a command.
704  * @new: New state to be set.
705  *
706  * Does not modify the state of aborted commands. Returns the previous command
707  * state.
708  */
709 static enum srpt_command_state srpt_set_cmd_state(struct srpt_ioctx *ioctx,
710                                                   enum srpt_command_state new)
711 {
712         enum srpt_command_state previous;
713
714         WARN_ON(!ioctx);
715         WARN_ON(new == SRPT_STATE_NEW);
716
717         do {
718                 previous = atomic_read(&ioctx->state);
719         } while (previous != SRPT_STATE_ABORTED
720                && atomic_cmpxchg(&ioctx->state, previous, new) != previous);
721
722         return previous;
723 }
724
725 /**
726  * Test and set the state of a command.
727  * @old: State to compare against.
728  * @new: New state to be set if the current state matches 'old'.
729  *
730  * Returns the previous command state.
731  */
732 static enum srpt_command_state
733 srpt_test_and_set_cmd_state(struct srpt_ioctx *ioctx,
734                             enum srpt_command_state old,
735                             enum srpt_command_state new)
736 {
737         WARN_ON(!ioctx);
738         WARN_ON(old == SRPT_STATE_ABORTED);
739         WARN_ON(new == SRPT_STATE_NEW);
740
741         return atomic_cmpxchg(&ioctx->state, old, new);
742 }
743
744 /**
745  * Post a receive request on the work queue of InfiniBand device 'sdev'.
746  */
747 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
748 {
749         struct ib_sge list;
750         struct ib_recv_wr wr, *bad_wr;
751
752         wr.wr_id = ioctx->index | SRPT_OP_RECV;
753
754         list.addr = ioctx->dma;
755         list.length = srp_max_message_size;
756         list.lkey = sdev->mr->lkey;
757
758         wr.next = NULL;
759         wr.sg_list = &list;
760         wr.num_sge = 1;
761
762         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
763 }
764
765 /**
766  * Post an IB send request.
767  * @ch: RDMA channel to post the send request on.
768  * @ioctx: I/O context of the send request.
769  * @len: length of the request to be sent in bytes.
770  *
771  * Returns zero upon success and a non-zero value upon failure.
772  */
773 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
774                           int len)
775 {
776         struct ib_sge list;
777         struct ib_send_wr wr, *bad_wr;
778         struct srpt_device *sdev = ch->sport->sdev;
779         int ret;
780
781         ret = -ENOMEM;
782         if (atomic_dec_return(&ch->qp_wr_avail) < 0) {
783                 PRINT_ERROR("%s[%d]: SRQ full", __func__, __LINE__);
784                 goto out;
785         }
786
787         ib_dma_sync_single_for_device(sdev->device, ioctx->dma,
788                                       len, DMA_TO_DEVICE);
789
790         list.addr = ioctx->dma;
791         list.length = len;
792         list.lkey = sdev->mr->lkey;
793
794         wr.next = NULL;
795         wr.wr_id = ioctx->index;
796         wr.sg_list = &list;
797         wr.num_sge = 1;
798         wr.opcode = IB_WR_SEND;
799         wr.send_flags = IB_SEND_SIGNALED;
800
801         ret = ib_post_send(ch->qp, &wr, &bad_wr);
802
803 out:
804         if (ret < 0)
805                 atomic_inc(&ch->qp_wr_avail);
806         return ret;
807 }
808
809 /**
810  * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request.
811  * @ioctx: Pointer to the I/O context associated with the request.
812  * @srp_cmd: Pointer to the SRP_CMD request data.
813  * @dir: Pointer to the variable to which the transfer direction will be
814  *   written.
815  * @data_len: Pointer to the variable to which the total data length of all
816  *   descriptors in the SRP_CMD request will be written.
817  *
818  * This function initializes ioctx->nrbuf and ioctx->r_bufs.
819  *
820  * Returns -EINVAL when the SRP_CMD request contains inconsistent descriptors;
821  * -ENOMEM when memory allocation fails and zero upon success.
822  */
823 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
824                              scst_data_direction *dir, u64 *data_len)
825 {
826         struct srp_indirect_buf *idb;
827         struct srp_direct_buf *db;
828         unsigned add_cdb_offset;
829         int ret;
830
831         /*
832          * The pointer computations below will only be compiled correctly
833          * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check
834          * whether srp_cmd::add_data has been declared as a byte pointer.
835          */
836 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
837         BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0)
838                      && !__same_type(srp_cmd->add_data[0], (u8)0));
839 #else
840         /* Note: the __same_type() macro has been introduced in kernel 2.6.31.*/
841 #endif
842
843         BUG_ON(!dir);
844         BUG_ON(!data_len);
845
846         ret = 0;
847         *data_len = 0;
848
849         /*
850          * The lower four bits of the buffer format field contain the DATA-IN
851          * buffer descriptor format, and the highest four bits contain the
852          * DATA-OUT buffer descriptor format.
853          */
854         *dir = SCST_DATA_NONE;
855         if (srp_cmd->buf_fmt & 0xf)
856                 /* DATA-IN: transfer data from target to initiator. */
857                 *dir = SCST_DATA_READ;
858         else if (srp_cmd->buf_fmt >> 4)
859                 /* DATA-OUT: transfer data from initiator to target. */
860                 *dir = SCST_DATA_WRITE;
861
862         /*
863          * According to the SRP spec, the lower two bits of the 'ADDITIONAL
864          * CDB LENGTH' field are reserved and the size in bytes of this field
865          * is four times the value specified in bits 3..7. Hence the "& ~3".
866          */
867         add_cdb_offset = srp_cmd->add_cdb_len & ~3;
868         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
869             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
870                 ioctx->n_rbuf = 1;
871                 ioctx->rbufs = &ioctx->single_rbuf;
872
873                 db = (struct srp_direct_buf *)(srp_cmd->add_data
874                                                + add_cdb_offset);
875                 memcpy(ioctx->rbufs, db, sizeof *db);
876                 *data_len = be32_to_cpu(db->len);
877         } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) ||
878                    ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) {
879                 idb = (struct srp_indirect_buf *)(srp_cmd->add_data
880                                                   + add_cdb_offset);
881
882                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
883
884                 if (ioctx->n_rbuf >
885                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
886                         PRINT_ERROR("received corrupt SRP_CMD request"
887                                     " (%u out + %u in != %u / %zu)",
888                                     srp_cmd->data_out_desc_cnt,
889                                     srp_cmd->data_in_desc_cnt,
890                                     be32_to_cpu(idb->table_desc.len),
891                                     sizeof(*db));
892                         ioctx->n_rbuf = 0;
893                         ret = -EINVAL;
894                         goto out;
895                 }
896
897                 if (ioctx->n_rbuf == 1)
898                         ioctx->rbufs = &ioctx->single_rbuf;
899                 else {
900                         ioctx->rbufs =
901                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
902                         if (!ioctx->rbufs) {
903                                 ioctx->n_rbuf = 0;
904                                 ret = -ENOMEM;
905                                 goto out;
906                         }
907                 }
908
909                 db = idb->desc_list;
910                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
911                 *data_len = be32_to_cpu(idb->len);
912         }
913 out:
914         return ret;
915 }
916
917 /*
918  * Modify the attributes of queue pair 'qp': allow local write, remote read,
919  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
920  */
921 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
922 {
923         struct ib_qp_attr *attr;
924         int ret;
925
926         attr = kzalloc(sizeof *attr, GFP_KERNEL);
927         if (!attr)
928                 return -ENOMEM;
929
930         attr->qp_state = IB_QPS_INIT;
931         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
932             IB_ACCESS_REMOTE_WRITE;
933         attr->port_num = ch->sport->port;
934         attr->pkey_index = 0;
935
936         ret = ib_modify_qp(qp, attr,
937                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
938                            IB_QP_PKEY_INDEX);
939
940         kfree(attr);
941         return ret;
942 }
943
944 /**
945  * Change the state of a channel to 'ready to receive' (RTR).
946  * @ch: channel of the queue pair.
947  * @qp: queue pair to change the state of.
948  *
949  * Returns zero upon success and a negative value upon failure.
950  *
951  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
952  * If this structure ever becomes larger, it might be necessary to allocate
953  * it dynamically instead of on the stack.
954  */
955 static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
956 {
957         struct ib_qp_attr qp_attr;
958         int attr_mask;
959         int ret;
960
961         qp_attr.qp_state = IB_QPS_RTR;
962         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
963         if (ret)
964                 goto out;
965
966         qp_attr.max_dest_rd_atomic = 4;
967
968         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
969
970 out:
971         return ret;
972 }
973
974 /**
975  * Change the state of a channel to 'ready to send' (RTS).
976  * @ch: channel of the queue pair.
977  * @qp: queue pair to change the state of.
978  *
979  * Returns zero upon success and a negative value upon failure.
980  *
981  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
982  * If this structure ever becomes larger, it might be necessary to allocate
983  * it dynamically instead of on the stack.
984  */
985 static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
986 {
987         struct ib_qp_attr qp_attr;
988         int attr_mask;
989         int ret;
990
991         qp_attr.qp_state = IB_QPS_RTS;
992         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
993         if (ret)
994                 goto out;
995
996         qp_attr.max_rd_atomic = 4;
997
998         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
999
1000 out:
1001         return ret;
1002 }
1003
1004 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
1005 {
1006         srpt_unmap_sg_to_ib_sge(ch, ioctx);
1007
1008         if (ioctx->n_rbuf > 1) {
1009                 kfree(ioctx->rbufs);
1010                 ioctx->rbufs = NULL;
1011         }
1012
1013         WARN_ON(!ch);
1014         if (!ch)
1015                 return;
1016
1017         if (srpt_post_recv(ch->sport->sdev, ioctx))
1018                 PRINT_ERROR("%s", "SRQ post_recv failed - this is serious.");
1019                 /* we should queue it back to free_ioctx queue */
1020         else
1021                 atomic_inc(&ch->req_lim_delta);
1022 }
1023
1024 /**
1025  * Abort a command.
1026  */
1027 static void srpt_abort_scst_cmd(struct srpt_device *sdev,
1028                                 struct scst_cmd *scmnd)
1029 {
1030         struct srpt_ioctx *ioctx;
1031         scst_data_direction dir;
1032         enum srpt_command_state previous_state;
1033
1034         TRACE_ENTRY();
1035
1036         ioctx = scst_cmd_get_tgt_priv(scmnd);
1037         BUG_ON(!ioctx);
1038
1039         previous_state = srpt_set_cmd_state(ioctx, SRPT_STATE_ABORTED);
1040         if (previous_state == SRPT_STATE_ABORTED)
1041                 goto out;
1042
1043         TRACE_DBG("Aborting cmd with state %d and tag %lld",
1044                   previous_state, scst_cmd_get_tag(scmnd));
1045
1046         dir = scst_cmd_get_data_direction(scmnd);
1047         if (dir != SCST_DATA_NONE && scst_cmd_get_sg(scmnd))
1048                 ib_dma_unmap_sg(sdev->device,
1049                                 scst_cmd_get_sg(scmnd),
1050                                 scst_cmd_get_sg_cnt(scmnd),
1051                                 scst_to_tgt_dma_dir(dir));
1052
1053         switch (previous_state) {
1054         case SRPT_STATE_NEW:
1055                 break;
1056         case SRPT_STATE_NEED_DATA:
1057                 WARN_ON(scst_cmd_get_data_direction(ioctx->scmnd)
1058                         == SCST_DATA_READ);
1059                 scst_rx_data(scmnd,
1060                              SCST_RX_STATUS_ERROR,
1061                              SCST_CONTEXT_THREAD);
1062                 break;
1063         case SRPT_STATE_DATA_IN:
1064         case SRPT_STATE_PROCESSED:
1065                 scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
1066                 WARN_ON(scmnd->state != SCST_CMD_STATE_XMIT_WAIT);
1067                 scst_tgt_cmd_done(scmnd, scst_estimate_context());
1068                 break;
1069         default:
1070                 TRACE_DBG("Aborting cmd with state %d", previous_state);
1071                 WARN_ON("ERROR: unexpected command state");
1072         }
1073
1074 out:
1075         ;
1076
1077         TRACE_EXIT();
1078 }
1079
1080 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
1081 {
1082         struct srpt_ioctx *ioctx;
1083         struct srpt_device *sdev = ch->sport->sdev;
1084
1085         if (wc->wr_id & SRPT_OP_RECV) {
1086                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
1087                 PRINT_ERROR("%s", "This is serious - SRQ is in bad state.");
1088         } else {
1089                 ioctx = sdev->ioctx_ring[wc->wr_id];
1090
1091                 if (ioctx->scmnd)
1092                         srpt_abort_scst_cmd(sdev, ioctx->scmnd);
1093                 else
1094                         srpt_reset_ioctx(ch, ioctx);
1095         }
1096 }
1097
1098 /** Process an IB send completion notification. */
1099 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
1100                                   struct srpt_ioctx *ioctx,
1101                                   enum scst_exec_context context)
1102 {
1103         if (ioctx->scmnd) {
1104                 scst_data_direction dir =
1105                         scst_cmd_get_data_direction(ioctx->scmnd);
1106
1107                 if (dir != SCST_DATA_NONE && scst_cmd_get_sg(ioctx->scmnd))
1108                         ib_dma_unmap_sg(ch->sport->sdev->device,
1109                                         scst_cmd_get_sg(ioctx->scmnd),
1110                                         scst_cmd_get_sg_cnt(ioctx->scmnd),
1111                                         scst_to_tgt_dma_dir(dir));
1112
1113                 WARN_ON(ioctx->scmnd->state != SCST_CMD_STATE_XMIT_WAIT);
1114                 scst_tgt_cmd_done(ioctx->scmnd, context);
1115         } else
1116                 srpt_reset_ioctx(ch, ioctx);
1117 }
1118
1119 /** Process an IB RDMA completion notification. */
1120 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
1121                                   struct srpt_ioctx *ioctx)
1122 {
1123         if (!ioctx->scmnd) {
1124                 WARN_ON("ERROR: ioctx->scmnd == NULL");
1125                 srpt_reset_ioctx(ch, ioctx);
1126                 return;
1127         }
1128
1129         /*
1130          * If an RDMA completion notification has been received for a write
1131          * command, tell SCST that processing can continue by calling
1132          * scst_rx_data().
1133          */
1134         if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
1135                                 SRPT_STATE_DATA_IN) == SRPT_STATE_NEED_DATA) {
1136                 WARN_ON(scst_cmd_get_data_direction(ioctx->scmnd)
1137                         == SCST_DATA_READ);
1138                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
1139                              scst_estimate_context());
1140         }
1141 }
1142
1143 /**
1144  * srpt_build_cmd_rsp() - Build an SRP_RSP response.
1145  * @ch: RDMA channel through which the request has been received.
1146  * @ioctx: I/O context associated with the SRP_CMD request. The response will
1147  *   be built in the buffer ioctx->buf points at and hence this function will
1148  *   overwrite the request data.
1149  * @tag: tag of the request for which this response is being generated.
1150  * @status: value for the STATUS field of the SRP_RSP information unit.
1151  * @sense_data: pointer to sense data to be included in the response.
1152  * @sense_data_len: length in bytes of the sense data.
1153  *
1154  * Returns the size in bytes of the SRP_RSP response.
1155  *
1156  * An SRP_RSP response contains a SCSI status or service response. See also
1157  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1158  * response. See also SPC-2 for more information about sense data.
1159  */
1160 static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
1161                               struct srpt_ioctx *ioctx, u64 tag, int status,
1162                               const u8 *sense_data, int sense_data_len)
1163 {
1164         struct srp_rsp *srp_rsp;
1165         int limit_delta;
1166         int max_sense_len;
1167
1168         /*
1169          * The lowest bit of all SAM-3 status codes is zero (see also
1170          * paragraph 5.3 in SAM-3).
1171          */
1172         WARN_ON(status & 1);
1173
1174         srp_rsp = ioctx->buf;
1175         BUG_ON(!srp_rsp);
1176         memset(srp_rsp, 0, sizeof *srp_rsp);
1177
1178         limit_delta = atomic_read(&ch->req_lim_delta);
1179         atomic_sub(limit_delta, &ch->req_lim_delta);
1180
1181         srp_rsp->opcode = SRP_RSP;
1182         /*
1183          * Copy the SCSOLNT or UCSOLNT bit from the request to the SOLNT bit
1184          * of the response.
1185          */
1186         srp_rsp->sol_not
1187                 = (ioctx->sol_not
1188                    & (status == SAM_STAT_GOOD ? SRP_SCSOLNT : SRP_UCSOLNT))
1189                 ? SRP_SOLNT : 0;
1190         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1191         srp_rsp->tag = tag;
1192
1193         if (SCST_SENSE_VALID(sense_data)) {
1194                 BUILD_BUG_ON(MIN_MAX_MESSAGE_SIZE <= sizeof(*srp_rsp));
1195                 max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);
1196                 if (sense_data_len > max_sense_len) {
1197                         PRINT_WARNING("truncated sense data from %d to %d"
1198                                 " bytes", sense_data_len,
1199                                 max_sense_len);
1200                         sense_data_len = max_sense_len;
1201                 }
1202
1203                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
1204                 srp_rsp->status = status;
1205                 srp_rsp->sense_data_len = cpu_to_be32(sense_data_len);
1206                 memcpy(srp_rsp + 1, sense_data, sense_data_len);
1207         } else
1208                 sense_data_len = 0;
1209
1210         return sizeof(*srp_rsp) + sense_data_len;
1211 }
1212
1213 /**
1214  * Build a task management response, which is a specific SRP_RSP response.
1215  * @ch: RDMA channel through which the request has been received.
1216  * @ioctx: I/O context in which the SRP_RSP response will be built.
1217  * @rsp_code: RSP_CODE that will be stored in the response.
1218  * @tag: tag of the request for which this response is being generated.
1219  *
1220  * Returns the size in bytes of the SRP_RSP response.
1221  *
1222  * An SRP_RSP response contains a SCSI status or service response. See also
1223  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1224  * response.
1225  */
1226 static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
1227                                   struct srpt_ioctx *ioctx, u8 rsp_code,
1228                                   u64 tag)
1229 {
1230         struct srp_rsp *srp_rsp;
1231         int limit_delta;
1232         int resp_data_len;
1233         int resp_len;
1234
1235         resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4;
1236         resp_len = sizeof(*srp_rsp) + resp_data_len;
1237
1238         srp_rsp = ioctx->buf;
1239         memset(srp_rsp, 0, sizeof *srp_rsp);
1240
1241         limit_delta = atomic_read(&ch->req_lim_delta);
1242         atomic_sub(limit_delta, &ch->req_lim_delta);
1243
1244         srp_rsp->opcode = SRP_RSP;
1245         /*
1246          * Copy the SCSOLNT or UCSOLNT bit from the request to the SOLNT bit
1247          * of the response.
1248          */
1249         srp_rsp->sol_not
1250                 = (ioctx->sol_not
1251                    & (rsp_code == SRP_TSK_MGMT_SUCCESS
1252                       ? SRP_SCSOLNT : SRP_UCSOLNT))
1253                 ? SRP_SOLNT : 0;
1254         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1255         srp_rsp->tag = tag;
1256
1257         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
1258                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
1259                 srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
1260                 srp_rsp->data[3] = rsp_code;
1261         }
1262
1263         return resp_len;
1264 }
1265
1266 /*
1267  * Process SRP_CMD.
1268  */
1269 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
1270 {
1271         struct scst_cmd *scmnd;
1272         struct srp_cmd *srp_cmd;
1273         scst_data_direction dir;
1274         u64 data_len;
1275         int ret;
1276
1277         srp_cmd = ioctx->buf;
1278
1279         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
1280                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
1281                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
1282         if (!scmnd)
1283                 goto err;
1284
1285         ioctx->scmnd = scmnd;
1286
1287         ret = srpt_get_desc_tbl(ioctx, srp_cmd, &dir, &data_len);
1288         if (ret) {
1289                 scst_set_cmd_error(scmnd,
1290                         SCST_LOAD_SENSE(scst_sense_invalid_field_in_cdb));
1291                 goto err;
1292         }
1293
1294         switch (srp_cmd->task_attr) {
1295         case SRP_CMD_HEAD_OF_Q:
1296                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
1297                 break;
1298         case SRP_CMD_ORDERED_Q:
1299                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1300                 break;
1301         case SRP_CMD_SIMPLE_Q:
1302                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
1303                 break;
1304         case SRP_CMD_ACA:
1305                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
1306                 break;
1307         default:
1308                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1309                 break;
1310         }
1311
1312         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1313         scst_cmd_set_tgt_priv(scmnd, ioctx);
1314         scst_cmd_set_expected(scmnd, dir, data_len);
1315         scst_cmd_init_done(scmnd, scst_estimate_context());
1316
1317         return 0;
1318
1319 err:
1320         return -1;
1321 }
1322
1323 /*
1324  * srpt_handle_tsk_mgmt() - Process an SRP_TSK_MGMT information unit.
1325  *
1326  * Returns SRP_TSK_MGMT_SUCCESS upon success.
1327  *
1328  * Each task management function is performed by calling one of the
1329  * scst_rx_mgmt_fn*() functions. These functions will either report failure
1330  * or process the task management function asynchronously. The function
1331  * srpt_tsk_mgmt_done() will be called by the SCST core upon completion of the
1332  * task management function. When srpt_handle_tsk_mgmt() reports failure
1333  * (i.e. returns -1) a response will have been built in ioctx->buf. This
1334  * information unit has to be sent back by the caller.
1335  *
1336  * For more information about SRP_TSK_MGMT information units, see also section
1337  * 6.7 in the T10 SRP r16a document.
1338  */
1339 static u8 srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1340                                struct srpt_ioctx *ioctx)
1341 {
1342         struct srp_tsk_mgmt *srp_tsk;
1343         struct srpt_mgmt_ioctx *mgmt_ioctx;
1344         int ret;
1345         u8 srp_tsk_mgmt_status;
1346
1347         srp_tsk = ioctx->buf;
1348
1349         TRACE_DBG("recv_tsk_mgmt= %d for task_tag= %lld"
1350                   " using tag= %lld cm_id= %p sess= %p",
1351                   srp_tsk->tsk_mgmt_func,
1352                   (unsigned long long) srp_tsk->task_tag,
1353                   (unsigned long long) srp_tsk->tag,
1354                   ch->cm_id, ch->scst_sess);
1355
1356         srp_tsk_mgmt_status = SRP_TSK_MGMT_FAILED;
1357         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1358         if (!mgmt_ioctx)
1359                 goto err;
1360
1361         mgmt_ioctx->ioctx = ioctx;
1362         mgmt_ioctx->ch = ch;
1363         mgmt_ioctx->tag = srp_tsk->tag;
1364
1365         switch (srp_tsk->tsk_mgmt_func) {
1366         case SRP_TSK_ABORT_TASK:
1367                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK");
1368                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1369                                           SCST_ABORT_TASK,
1370                                           srp_tsk->task_tag,
1371                                           thread ?
1372                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1373                                           mgmt_ioctx);
1374                 break;
1375         case SRP_TSK_ABORT_TASK_SET:
1376                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK_SET");
1377                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1378                                           SCST_ABORT_TASK_SET,
1379                                           (u8 *) &srp_tsk->lun,
1380                                           sizeof srp_tsk->lun,
1381                                           thread ?
1382                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1383                                           mgmt_ioctx);
1384                 break;
1385         case SRP_TSK_CLEAR_TASK_SET:
1386                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_TASK_SET");
1387                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1388                                           SCST_CLEAR_TASK_SET,
1389                                           (u8 *) &srp_tsk->lun,
1390                                           sizeof srp_tsk->lun,
1391                                           thread ?
1392                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1393                                           mgmt_ioctx);
1394                 break;
1395         case SRP_TSK_LUN_RESET:
1396                 TRACE_DBG("%s", "Processing SRP_TSK_LUN_RESET");
1397                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1398                                           SCST_LUN_RESET,
1399                                           (u8 *) &srp_tsk->lun,
1400                                           sizeof srp_tsk->lun,
1401                                           thread ?
1402                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1403                                           mgmt_ioctx);
1404                 break;
1405         case SRP_TSK_CLEAR_ACA:
1406                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_ACA");
1407                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1408                                           SCST_CLEAR_ACA,
1409                                           (u8 *) &srp_tsk->lun,
1410                                           sizeof srp_tsk->lun,
1411                                           thread ?
1412                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1413                                           mgmt_ioctx);
1414                 break;
1415         default:
1416                 TRACE_DBG("%s", "Unsupported task management function.");
1417                 srp_tsk_mgmt_status = SRP_TSK_MGMT_FUNC_NOT_SUPP;
1418                 goto err;
1419         }
1420
1421         if (ret) {
1422                 TRACE_DBG("Processing task management function failed"
1423                           " (ret = %d).", ret);
1424                 goto err;
1425         }
1426         return SRP_TSK_MGMT_SUCCESS;
1427
1428 err:
1429         kfree(mgmt_ioctx);
1430         return srp_tsk_mgmt_status;
1431 }
1432
1433 /**
1434  * set_sense() - A copy of the function with the same name in
1435  * scst/src/common.c.
1436  */
1437 static int set_sense(uint8_t *buffer, int len, int key, int asc, int ascq)
1438 {
1439         int res = 18;
1440
1441         EXTRACHECKS_BUG_ON(len < res);
1442
1443         memset(buffer, 0, res);
1444
1445         buffer[0] = 0x70;       /* Error Code                   */
1446         buffer[2] = key;        /* Sense Key                    */
1447         buffer[7] = 0x0a;       /* Additional Sense Length      */
1448         buffer[12] = asc;       /* ASC                          */
1449         buffer[13] = ascq;      /* ASCQ                         */
1450
1451         TRACE_BUFFER("Sense set", buffer, res);
1452         return res;
1453 }
1454
1455 /**
1456  * Process a newly received information unit.
1457  * @ch: RDMA channel through which the information unit has been received.
1458  * @ioctx: SRPT I/O context associated with the information unit.
1459  */
1460 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1461                                struct srpt_ioctx *ioctx)
1462 {
1463         struct srp_cmd *srp_cmd;
1464         enum rdma_ch_state ch_state;
1465         u8 srp_response_status;
1466         u8 srp_tsk_mgmt_status;
1467         int len;
1468
1469         /*
1470          * A quote from SAM-3, paragraph 4.9.6: "Any command that is not
1471          * relayed to a dependent logical unit shall be terminated with a
1472          * CHECK CONDITION status. The sense key shall be set to ILLEGAL
1473          * REQUEST and the additional sense code shall be set to INVALID
1474          * COMMAND OPERATION CODE. If a task management function cannot be
1475          * relayed to a dependent logical unit, a service response of SERVICE
1476          * DELIVERY OR TARGET FAILURE shall be returned."
1477          */
1478
1479         srp_response_status = SAM_STAT_CHECK_CONDITION;
1480         /* To keep the compiler happy. */
1481         srp_tsk_mgmt_status = -1;
1482
1483         ch_state = atomic_read(&ch->state);
1484         if (ch_state == RDMA_CHANNEL_CONNECTING) {
1485                 list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1486                 return;
1487         } else if (ch_state == RDMA_CHANNEL_DISCONNECTING) {
1488                 srpt_reset_ioctx(ch, ioctx);
1489                 return;
1490         }
1491
1492         WARN_ON(ch_state != RDMA_CHANNEL_LIVE);
1493
1494         ib_dma_sync_single_for_cpu(ch->sport->sdev->device,
1495                                    ioctx->dma, srp_max_message_size,
1496                                    DMA_FROM_DEVICE);
1497
1498         srp_cmd = ioctx->buf;
1499
1500         ioctx->n_rbuf = 0;
1501         ioctx->rbufs = NULL;
1502         ioctx->n_rdma = 0;
1503         ioctx->n_rdma_ius = 0;
1504         ioctx->rdma_ius = NULL;
1505         ioctx->scmnd = NULL;
1506         ioctx->ch = ch;
1507         ioctx->sol_not = srp_cmd->sol_not;
1508         atomic_set(&ioctx->state, SRPT_STATE_NEW);
1509
1510         switch (srp_cmd->opcode) {
1511         case SRP_CMD:
1512                 if (srpt_handle_cmd(ch, ioctx) < 0) {
1513                         if (ioctx->scmnd)
1514                                 srp_response_status =
1515                                         scst_cmd_get_status(ioctx->scmnd);
1516                         goto err;
1517                 }
1518                 break;
1519
1520         case SRP_TSK_MGMT:
1521                 srp_tsk_mgmt_status = srpt_handle_tsk_mgmt(ch, ioctx);
1522                 if (srp_tsk_mgmt_status != SRP_TSK_MGMT_SUCCESS)
1523                         goto err;
1524                 break;
1525
1526         case SRP_I_LOGOUT:
1527         case SRP_AER_REQ:
1528         default:
1529                 goto err;
1530         }
1531
1532         return;
1533
1534 err:
1535         ch_state = atomic_read(&ch->state);
1536         if (ch_state != RDMA_CHANNEL_LIVE) {
1537                 /* Give up if another thread modified the channel state. */
1538                 PRINT_ERROR("%s: channel is in state %d", __func__, ch_state);
1539                 srpt_reset_ioctx(ch, ioctx);
1540         } else {
1541                 if (srp_cmd->opcode == SRP_TSK_MGMT) {
1542                         len = srpt_build_tskmgmt_rsp(ch, ioctx,
1543                                      srp_tsk_mgmt_status,
1544                                      ((struct srp_tsk_mgmt *)srp_cmd)->tag);
1545                 } else if (ioctx->scmnd)
1546                         len = srpt_build_cmd_rsp(ch, ioctx, srp_cmd->tag,
1547                                 srp_response_status,
1548                                 scst_cmd_get_sense_buffer(ioctx->scmnd),
1549                                 scst_cmd_get_sense_buffer_len(ioctx->scmnd));
1550                 else {
1551                         u8 sense_buf[18];
1552                         int sense_len;
1553
1554                         sense_len = set_sense(sense_buf,
1555                                               ARRAY_SIZE(sense_buf),
1556                                               scst_sense_invalid_field_in_cdb);
1557                         len = srpt_build_cmd_rsp(ch, ioctx, srp_cmd->tag,
1558                                                  srp_response_status,
1559                                                  sense_buf, sense_len);
1560                 }
1561                 if (srpt_post_send(ch, ioctx, len)) {
1562                         PRINT_ERROR("%s: sending SRP_RSP response failed",
1563                                     __func__);
1564                         srpt_reset_ioctx(ch, ioctx);
1565                 }
1566         }
1567 }
1568
1569 /*
1570  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1571  * should stop.
1572  * @pre thread != 0
1573  */
1574 static inline int srpt_test_ioctx_list(void)
1575 {
1576         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1577                    unlikely(kthread_should_stop()));
1578         return res;
1579 }
1580
1581 /*
1582  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1583  *
1584  * @pre thread != 0
1585  */
1586 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1587 {
1588         unsigned long flags;
1589
1590         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1591         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1592         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1593         wake_up(&ioctx_list_waitQ);
1594 }
1595
1596 /**
1597  * InfiniBand completion queue callback function.
1598  * @cq: completion queue.
1599  * @ctx: completion queue context, which was passed as the fourth argument of
1600  *       the function ib_create_cq().
1601  */
1602 static void srpt_completion(struct ib_cq *cq, void *ctx)
1603 {
1604         struct srpt_rdma_ch *ch = ctx;
1605         struct srpt_device *sdev = ch->sport->sdev;
1606         struct ib_wc wc;
1607         struct srpt_ioctx *ioctx;
1608
1609         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1610         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1611                 if (wc.status) {
1612                         PRINT_ERROR("failed %s status= %d",
1613                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1614                                wc.status);
1615                         srpt_handle_err_comp(ch, &wc);
1616                         break;
1617                 }
1618
1619                 if (wc.wr_id & SRPT_OP_RECV) {
1620                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1621                         if (thread) {
1622                                 ioctx->ch = ch;
1623                                 ioctx->op = IB_WC_RECV;
1624                                 srpt_schedule_thread(ioctx);
1625                         } else
1626                                 srpt_handle_new_iu(ch, ioctx);
1627                         continue;
1628                 } else {
1629                         ioctx = sdev->ioctx_ring[wc.wr_id];
1630                         if (wc.opcode == IB_WC_SEND)
1631                                 atomic_inc(&ch->qp_wr_avail);
1632                         else {
1633                                 WARN_ON(wc.opcode != IB_WC_RDMA_READ);
1634                                 WARN_ON(ioctx->n_rdma <= 0);
1635                                 atomic_add(ioctx->n_rdma,
1636                                            &ch->qp_wr_avail);
1637                         }
1638                 }
1639
1640                 if (thread) {
1641                         ioctx->ch = ch;
1642                         ioctx->op = wc.opcode;
1643                         srpt_schedule_thread(ioctx);
1644                 } else {
1645                         switch (wc.opcode) {
1646                         case IB_WC_SEND:
1647                                 srpt_handle_send_comp(ch, ioctx,
1648                                         scst_estimate_context());
1649                                 break;
1650                         case IB_WC_RDMA_WRITE:
1651                         case IB_WC_RDMA_READ:
1652                                 srpt_handle_rdma_comp(ch, ioctx);
1653                                 break;
1654                         default:
1655                                 break;
1656                         }
1657                 }
1658
1659 #if defined(CONFIG_SCST_DEBUG)
1660                 if (interrupt_processing_delay_in_us <= MAX_UDELAY_MS * 1000)
1661                         udelay(interrupt_processing_delay_in_us);
1662 #endif
1663         }
1664 }
1665
1666 /*
1667  * Create a completion queue on the specified device.
1668  */
1669 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1670 {
1671         struct ib_qp_init_attr *qp_init;
1672         struct srpt_device *sdev = ch->sport->sdev;
1673         int cqe;
1674         int ret;
1675
1676         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1677         if (!qp_init)
1678                 return -ENOMEM;
1679
1680         /* Create a completion queue (CQ). */
1681
1682         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1683 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1684         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1685 #else
1686         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1687 #endif
1688         if (IS_ERR(ch->cq)) {
1689                 ret = PTR_ERR(ch->cq);
1690                 PRINT_ERROR("failed to create_cq cqe= %d ret= %d", cqe, ret);
1691                 goto out;
1692         }
1693
1694         /* Request completion notification. */
1695
1696         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1697
1698         /* Create a queue pair (QP). */
1699
1700         qp_init->qp_context = (void *)ch;
1701         qp_init->event_handler
1702                 = (void(*)(struct ib_event *, void*))srpt_qp_event;
1703         qp_init->send_cq = ch->cq;
1704         qp_init->recv_cq = ch->cq;
1705         qp_init->srq = sdev->srq;
1706         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1707         qp_init->qp_type = IB_QPT_RC;
1708         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1709         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1710
1711         ch->qp = ib_create_qp(sdev->pd, qp_init);
1712         if (IS_ERR(ch->qp)) {
1713                 ret = PTR_ERR(ch->qp);
1714                 ib_destroy_cq(ch->cq);
1715                 PRINT_ERROR("failed to create_qp ret= %d", ret);
1716                 goto out;
1717         }
1718
1719         atomic_set(&ch->qp_wr_avail, qp_init->cap.max_send_wr);
1720
1721         TRACE_DBG("%s: max_cqe= %d max_sge= %d cm_id= %p",
1722                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1723                ch->cm_id);
1724
1725         /* Modify the attributes and the state of queue pair ch->qp. */
1726
1727         ret = srpt_init_ch_qp(ch, ch->qp);
1728         if (ret) {
1729                 ib_destroy_qp(ch->qp);
1730                 ib_destroy_cq(ch->cq);
1731                 goto out;
1732         }
1733
1734         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1735 out:
1736         kfree(qp_init);
1737         return ret;
1738 }
1739
1740 /**
1741  * Release the channel corresponding to the specified cm_id.
1742  *
1743  * Note: must be called from inside srpt_cm_handler to avoid a race between
1744  * accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one()
1745  * (the caller of srpt_cm_handler holds the cm_id spinlock;
1746  * srpt_remove_one() waits until all SCST sessions for the associated
1747  * IB device have been unregistered and SCST session registration involves
1748  * a call to ib_destroy_cm_id(), which locks the cm_id spinlock and hence
1749  * waits until this function has finished).
1750  */
1751 static void srpt_release_channel_by_cmid(struct ib_cm_id *cm_id)
1752 {
1753         struct srpt_device *sdev;
1754         struct srpt_rdma_ch *ch;
1755
1756         sdev = cm_id->context;
1757         BUG_ON(!sdev);
1758         spin_lock_irq(&sdev->spinlock);
1759         list_for_each_entry(ch, &sdev->rch_list, list) {
1760                 if (ch->cm_id == cm_id) {
1761                         list_del(&ch->list);
1762                         atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING);
1763                         scst_unregister_session(ch->scst_sess, 0,
1764                                                 srpt_release_channel);
1765                         break;
1766                 }
1767         }
1768         spin_unlock_irq(&sdev->spinlock);
1769 }
1770
1771 /**
1772  * Look up the RDMA channel that corresponds to the specified cm_id.
1773  *
1774  * Return NULL if no matching RDMA channel has been found.
1775  */
1776 static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev,
1777                                               struct ib_cm_id *cm_id)
1778 {
1779         struct srpt_rdma_ch *ch;
1780
1781         BUG_ON(!sdev);
1782         ch = NULL;
1783         spin_lock_irq(&sdev->spinlock);
1784         list_for_each_entry(ch, &sdev->rch_list, list)
1785                 if (ch->cm_id == cm_id)
1786                         break;
1787         spin_unlock_irq(&sdev->spinlock);
1788
1789         return ch;
1790 }
1791
1792 /**
1793  * Release all resources associated with an RDMA channel.
1794  *
1795  * Notes:
1796  * - The caller must have removed the channel from the channel list before
1797  *   calling this function.
1798  * - Must be called as a callback function via scst_unregister_session(). Never
1799  *   call this function directly because doing so would trigger several race
1800  *   conditions.
1801  */
1802 static void srpt_release_channel(struct scst_session *scst_sess)
1803 {
1804         struct srpt_rdma_ch *ch;
1805
1806         TRACE_ENTRY();
1807
1808         ch = scst_sess_get_tgt_priv(scst_sess);
1809         BUG_ON(!ch);
1810         WARN_ON(srpt_find_channel(ch->sport->sdev, ch->cm_id) == ch);
1811
1812         WARN_ON(atomic_read(&ch->state) != RDMA_CHANNEL_DISCONNECTING);
1813
1814         TRACE_DBG("destroying cm_id %p", ch->cm_id);
1815         BUG_ON(!ch->cm_id);
1816         ib_destroy_cm_id(ch->cm_id);
1817
1818         ib_destroy_qp(ch->qp);
1819         ib_destroy_cq(ch->cq);
1820         kfree(ch);
1821
1822         TRACE_EXIT();
1823 }
1824
1825 /**
1826  * Process the event IB_CM_REQ_RECEIVED.
1827  *
1828  * Ownership of the cm_id is transferred to the SCST session if this functions
1829  * returns zero. Otherwise the caller remains the owner of cm_id.
1830  */
1831 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1832                             struct ib_cm_req_event_param *param,
1833                             void *private_data)
1834 {
1835         struct srpt_device *sdev = cm_id->context;
1836         struct srp_login_req *req;
1837         struct srp_login_rsp *rsp;
1838         struct srp_login_rej *rej;
1839         struct ib_cm_rep_param *rep_param;
1840         struct srpt_rdma_ch *ch, *tmp_ch;
1841         u32 it_iu_len;
1842         int ret = 0;
1843
1844 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
1845         WARN_ON(!sdev || !private_data);
1846         if (!sdev || !private_data)
1847                 return -EINVAL;
1848 #else
1849         if (WARN_ON(!sdev || !private_data))
1850                 return -EINVAL;
1851 #endif
1852
1853         req = (struct srp_login_req *)private_data;
1854
1855         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1856
1857         PRINT_INFO("Received SRP_LOGIN_REQ with"
1858             " i_port_id 0x%llx:0x%llx, t_port_id 0x%llx:0x%llx and it_iu_len %d"
1859             " on port %d (guid=0x%llx:0x%llx)",
1860             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1861             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1862             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1863             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1864             it_iu_len,
1865             param->port,
1866             (unsigned long long)be64_to_cpu(*(u64 *)
1867                                 &sdev->port[param->port - 1].gid.raw[0]),
1868             (unsigned long long)be64_to_cpu(*(u64 *)
1869                                 &sdev->port[param->port - 1].gid.raw[8]));
1870
1871         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1872         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1873         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1874
1875         if (!rsp || !rej || !rep_param) {
1876                 ret = -ENOMEM;
1877                 goto out;
1878         }
1879
1880         if (it_iu_len > srp_max_message_size || it_iu_len < 64) {
1881                 rej->reason =
1882                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1883                 ret = -EINVAL;
1884                 PRINT_ERROR("rejected SRP_LOGIN_REQ because its"
1885                             " length (%d bytes) is out of range (%d .. %d)",
1886                             it_iu_len, 64, srp_max_message_size);
1887                 goto reject;
1888         }
1889
1890         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1891                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1892
1893                 spin_lock_irq(&sdev->spinlock);
1894
1895                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1896                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1897                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1898                             && param->port == ch->sport->port
1899                             && param->listen_id == ch->sport->sdev->cm_id
1900                             && ch->cm_id) {
1901                                 enum rdma_ch_state prev_state;
1902
1903                                 /* found an existing channel */
1904                                 TRACE_DBG("Found existing channel name= %s"
1905                                           " cm_id= %p state= %d",
1906                                           ch->sess_name, ch->cm_id,
1907                                           atomic_read(&ch->state));
1908
1909                                 prev_state = atomic_xchg(&ch->state,
1910                                                 RDMA_CHANNEL_DISCONNECTING);
1911                                 if (prev_state == RDMA_CHANNEL_CONNECTING)
1912                                         list_del(&ch->list);
1913
1914                                 spin_unlock_irq(&sdev->spinlock);
1915
1916                                 rsp->rsp_flags =
1917                                         SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1918
1919                                 if (prev_state == RDMA_CHANNEL_LIVE) {
1920                                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
1921                                         PRINT_INFO("disconnected"
1922                                           " session %s because a new"
1923                                           " SRP_LOGIN_REQ has been received.",
1924                                           ch->sess_name);
1925                                 } else if (prev_state ==
1926                                          RDMA_CHANNEL_CONNECTING) {
1927                                         PRINT_ERROR("%s", "rejected"
1928                                           " SRP_LOGIN_REQ because another login"
1929                                           " request is being processed.");
1930                                         ib_send_cm_rej(ch->cm_id,
1931                                                        IB_CM_REJ_NO_RESOURCES,
1932                                                        NULL, 0, NULL, 0);
1933                                         scst_unregister_session(ch->scst_sess,
1934                                                         0,
1935                                                         srpt_release_channel);
1936                                 }
1937
1938                                 spin_lock_irq(&sdev->spinlock);
1939                         }
1940                 }
1941
1942                 spin_unlock_irq(&sdev->spinlock);
1943
1944         } else
1945                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1946
1947         if (((u64) (*(u64 *) req->target_port_id) !=
1948              cpu_to_be64(srpt_service_guid)) ||
1949             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1950              cpu_to_be64(srpt_service_guid))) {
1951                 rej->reason =
1952                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1953                 ret = -ENOMEM;
1954                 PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because it"
1955                        " has an invalid target port identifier.");
1956                 goto reject;
1957         }
1958
1959         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1960         if (!ch) {
1961                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1962                 PRINT_ERROR("%s",
1963                             "rejected SRP_LOGIN_REQ because out of memory.");
1964                 ret = -ENOMEM;
1965                 goto reject;
1966         }
1967
1968         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1969         memcpy(ch->t_port_id, req->target_port_id, 16);
1970         ch->sport = &sdev->port[param->port - 1];
1971         ch->cm_id = cm_id;
1972         atomic_set(&ch->state, RDMA_CHANNEL_CONNECTING);
1973         INIT_LIST_HEAD(&ch->cmd_wait_list);
1974
1975         ret = srpt_create_ch_ib(ch);
1976         if (ret) {
1977                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1978                 PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because creating"
1979                             " a new RDMA channel failed.");
1980                 goto free_ch;
1981         }
1982
1983         ret = srpt_ch_qp_rtr(ch, ch->qp);
1984         if (ret) {
1985                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1986                 PRINT_ERROR("rejected SRP_LOGIN_REQ because enabling"
1987                        " RTR failed (error code = %d)", ret);
1988                 goto destroy_ib;
1989         }
1990
1991         if (use_port_guid_in_session_name) {
1992                 /*
1993                  * If the kernel module parameter use_port_guid_in_session_name
1994                  * has been specified, use a combination of the target port
1995                  * GUID and the initiator port ID as the session name. This
1996                  * was the original behavior of the SRP target implementation
1997                  * (i.e. before the SRPT was included in OFED 1.3).
1998                  */
1999                 snprintf(ch->sess_name, sizeof(ch->sess_name),
2000                          "0x%016llx%016llx",
2001                          (unsigned long long)be64_to_cpu(*(u64 *)
2002                                 &sdev->port[param->port - 1].gid.raw[8]),
2003                          (unsigned long long)be64_to_cpu(*(u64 *)
2004                                 (ch->i_port_id + 8)));
2005         } else {
2006                 /*
2007                  * Default behavior: use the initator port identifier as the
2008                  * session name.
2009                  */
2010                 snprintf(ch->sess_name, sizeof(ch->sess_name),
2011                          "0x%016llx%016llx",
2012                          (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
2013                          (unsigned long long)be64_to_cpu(*(u64 *)
2014                                  (ch->i_port_id + 8)));
2015         }
2016
2017         TRACE_DBG("registering session %s", ch->sess_name);
2018
2019         BUG_ON(!sdev->scst_tgt);
2020         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
2021                                               NULL, NULL);
2022         if (!ch->scst_sess) {
2023                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2024                 TRACE_DBG("%s", "Failed to create scst sess");
2025                 goto destroy_ib;
2026         }
2027
2028         TRACE_DBG("Establish connection sess=%p name=%s cm_id=%p",
2029                   ch->scst_sess, ch->sess_name, ch->cm_id);
2030
2031         scst_sess_set_tgt_priv(ch->scst_sess, ch);
2032
2033         /* create srp_login_response */
2034         rsp->opcode = SRP_LOGIN_RSP;
2035         rsp->tag = req->tag;
2036         rsp->max_it_iu_len = req->req_it_iu_len;
2037         rsp->max_ti_iu_len = req->req_it_iu_len;
2038         ch->max_ti_iu_len = req->req_it_iu_len;
2039         rsp->buf_fmt =
2040             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
2041         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
2042         atomic_set(&ch->req_lim_delta, 0);
2043
2044         /* create cm reply */
2045         rep_param->qp_num = ch->qp->qp_num;
2046         rep_param->private_data = (void *)rsp;
2047         rep_param->private_data_len = sizeof *rsp;
2048         rep_param->rnr_retry_count = 7;
2049         rep_param->flow_control = 1;
2050         rep_param->failover_accepted = 0;
2051         rep_param->srq = 1;
2052         rep_param->responder_resources = 4;
2053         rep_param->initiator_depth = 4;
2054
2055         ret = ib_send_cm_rep(cm_id, rep_param);
2056         if (ret) {
2057                 PRINT_ERROR("sending SRP_LOGIN_REQ response failed"
2058                             " (error code = %d)", ret);
2059                 goto release_channel;
2060         }
2061
2062         spin_lock_irq(&sdev->spinlock);
2063         list_add_tail(&ch->list, &sdev->rch_list);
2064         spin_unlock_irq(&sdev->spinlock);
2065
2066         goto out;
2067
2068 release_channel:
2069         atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING);
2070         scst_unregister_session(ch->scst_sess, 0, NULL);
2071         ch->scst_sess = NULL;
2072
2073 destroy_ib:
2074         ib_destroy_qp(ch->qp);
2075         ib_destroy_cq(ch->cq);
2076
2077 free_ch:
2078         kfree(ch);
2079
2080 reject:
2081         rej->opcode = SRP_LOGIN_REJ;
2082         rej->tag = req->tag;
2083         rej->buf_fmt =
2084             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
2085
2086         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
2087                              (void *)rej, sizeof *rej);
2088
2089 out:
2090         kfree(rep_param);
2091         kfree(rsp);
2092         kfree(rej);
2093
2094         return ret;
2095 }
2096
2097 static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
2098 {
2099         PRINT_INFO("Received InfiniBand REJ packet for cm_id %p.", cm_id);
2100         srpt_release_channel_by_cmid(cm_id);
2101 }
2102
2103 /**
2104  * Process an IB_CM_RTU_RECEIVED or IB_CM_USER_ESTABLISHED event.
2105  *
2106  * An IB_CM_RTU_RECEIVED message indicates that the connection is established
2107  * and that the recipient may begin transmitting (RTU = ready to use).
2108  */
2109 static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
2110 {
2111         struct srpt_rdma_ch *ch;
2112         int ret;
2113
2114         ch = srpt_find_channel(cm_id->context, cm_id);
2115         WARN_ON(!ch);
2116         if (!ch)
2117                 goto out;
2118
2119         if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_CONNECTING,
2120                         RDMA_CHANNEL_LIVE) == RDMA_CHANNEL_CONNECTING) {
2121                 struct srpt_ioctx *ioctx, *ioctx_tmp;
2122
2123                 ret = srpt_ch_qp_rts(ch, ch->qp);
2124
2125                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
2126                                          wait_list) {
2127                         list_del(&ioctx->wait_list);
2128                         srpt_handle_new_iu(ch, ioctx);
2129                 }
2130                 if (ret && srpt_test_and_set_channel_state(ch,
2131                         RDMA_CHANNEL_LIVE,
2132                         RDMA_CHANNEL_DISCONNECTING) == RDMA_CHANNEL_LIVE) {
2133                         TRACE_DBG("cm_id=%p sess_name=%s state=%d",
2134                                   cm_id, ch->sess_name,
2135                                   atomic_read(&ch->state));
2136                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
2137                 }
2138         }
2139
2140 out:
2141         ;
2142 }
2143
2144 static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
2145 {
2146         PRINT_INFO("Received InfiniBand TimeWait exit for cm_id %p.", cm_id);
2147         srpt_release_channel_by_cmid(cm_id);
2148 }
2149
2150 static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
2151 {
2152         PRINT_INFO("Received InfiniBand REP error for cm_id %p.", cm_id);
2153         srpt_release_channel_by_cmid(cm_id);
2154 }
2155
2156 static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
2157 {
2158         struct srpt_rdma_ch *ch;
2159
2160         ch = srpt_find_channel(cm_id->context, cm_id);
2161         WARN_ON(!ch);
2162         if (!ch)
2163                 goto out;
2164
2165         TRACE_DBG("cm_id= %p ch->state= %d", cm_id, atomic_read(&ch->state));
2166
2167         switch (atomic_read(&ch->state)) {
2168         case RDMA_CHANNEL_LIVE:
2169         case RDMA_CHANNEL_CONNECTING:
2170                 ib_send_cm_drep(ch->cm_id, NULL, 0);
2171                 PRINT_INFO("Received DREQ and sent DREP for session %s.",
2172                            ch->sess_name);
2173                 break;
2174         case RDMA_CHANNEL_DISCONNECTING:
2175         default:
2176                 break;
2177         }
2178
2179 out:
2180         ;
2181 }
2182
2183 static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
2184 {
2185         PRINT_INFO("Received InfiniBand DREP message for cm_id %p.", cm_id);
2186         srpt_release_channel_by_cmid(cm_id);
2187 }
2188
2189 /**
2190  * IB connection manager callback function.
2191  *
2192  * A non-zero return value will cause the caller destroy the CM ID.
2193  *
2194  * Note: srpt_cm_handler() must only return a non-zero value when transferring
2195  * ownership of the cm_id to a channel by srpt_cm_req_recv() failed. Returning
2196  * a non-zero value in any other case will trigger a race with the
2197  * ib_destroy_cm_id() call in srpt_release_channel().
2198  */
2199 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2200 {
2201         int ret;
2202
2203         ret = 0;
2204         switch (event->event) {
2205         case IB_CM_REQ_RECEIVED:
2206                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
2207                                        event->private_data);
2208                 break;
2209         case IB_CM_REJ_RECEIVED:
2210                 srpt_cm_rej_recv(cm_id);
2211                 break;
2212         case IB_CM_RTU_RECEIVED:
2213         case IB_CM_USER_ESTABLISHED:
2214                 srpt_cm_rtu_recv(cm_id);
2215                 break;
2216         case IB_CM_DREQ_RECEIVED:
2217                 srpt_cm_dreq_recv(cm_id);
2218                 break;
2219         case IB_CM_DREP_RECEIVED:
2220                 srpt_cm_drep_recv(cm_id);
2221                 break;
2222         case IB_CM_TIMEWAIT_EXIT:
2223                 srpt_cm_timewait_exit(cm_id);
2224                 break;
2225         case IB_CM_REP_ERROR:
2226                 srpt_cm_rep_error(cm_id);
2227                 break;
2228         default:
2229                 break;
2230         }
2231
2232         return ret;
2233 }
2234
2235 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
2236                                  struct srpt_ioctx *ioctx,
2237                                  struct scst_cmd *scmnd)
2238 {
2239         struct scatterlist *scat;
2240         scst_data_direction dir;
2241         struct rdma_iu *riu;
2242         struct srp_direct_buf *db;
2243         dma_addr_t dma_addr;
2244         struct ib_sge *sge;
2245         u64 raddr;
2246         u32 rsize;
2247         u32 tsize;
2248         u32 dma_len;
2249         int count, nrdma;
2250         int i, j, k;
2251
2252         scat = scst_cmd_get_sg(scmnd);
2253         dir = scst_cmd_get_data_direction(scmnd);
2254         WARN_ON(scat == NULL);
2255         count = ib_dma_map_sg(ch->sport->sdev->device, scat,
2256                               scst_cmd_get_sg_cnt(scmnd),
2257                               scst_to_tgt_dma_dir(dir));
2258         if (unlikely(!count))
2259                 return -EBUSY;
2260
2261         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
2262                 nrdma = ioctx->n_rdma_ius;
2263         else {
2264                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
2265
2266                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
2267                                           scst_cmd_atomic(scmnd)
2268                                           ? GFP_ATOMIC : GFP_KERNEL);
2269                 if (!ioctx->rdma_ius) {
2270                         WARN_ON(scat == NULL);
2271                         ib_dma_unmap_sg(ch->sport->sdev->device,
2272                                         scat, scst_cmd_get_sg_cnt(scmnd),
2273                                         scst_to_tgt_dma_dir(dir));
2274                         return -ENOMEM;
2275                 }
2276
2277                 ioctx->n_rdma_ius = nrdma;
2278         }
2279
2280         db = ioctx->rbufs;
2281         tsize = (dir == SCST_DATA_READ) ?
2282                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2283         dma_len = sg_dma_len(&scat[0]);
2284         riu = ioctx->rdma_ius;
2285
2286         /*
2287          * For each remote desc - calculate the #ib_sge.
2288          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
2289          *      each remote desc rdma_iu is required a rdma wr;
2290          * else
2291          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
2292          *      another rdma wr
2293          */
2294         for (i = 0, j = 0;
2295              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2296                 rsize = be32_to_cpu(db->len);
2297                 raddr = be64_to_cpu(db->va);
2298                 riu->raddr = raddr;
2299                 riu->rkey = be32_to_cpu(db->key);
2300                 riu->sge_cnt = 0;
2301
2302                 /* calculate how many sge required for this remote_buf */
2303                 while (rsize > 0 && tsize > 0) {
2304
2305                         if (rsize >= dma_len) {
2306                                 tsize -= dma_len;
2307                                 rsize -= dma_len;
2308                                 raddr += dma_len;
2309
2310                                 if (tsize > 0) {
2311                                         ++j;
2312                                         if (j < count)
2313                                                 dma_len = sg_dma_len(&scat[j]);
2314                                 }
2315                         } else {
2316                                 tsize -= rsize;
2317                                 dma_len -= rsize;
2318                                 rsize = 0;
2319                         }
2320
2321                         ++riu->sge_cnt;
2322
2323                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
2324                                 ++ioctx->n_rdma;
2325                                 riu->sge =
2326                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
2327                                             scst_cmd_atomic(scmnd)
2328                                             ? GFP_ATOMIC : GFP_KERNEL);
2329                                 if (!riu->sge)
2330                                         goto free_mem;
2331
2332                                 ++riu;
2333                                 riu->sge_cnt = 0;
2334                                 riu->raddr = raddr;
2335                                 riu->rkey = be32_to_cpu(db->key);
2336                         }
2337                 }
2338
2339                 ++ioctx->n_rdma;
2340                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
2341                                    scst_cmd_atomic(scmnd)
2342                                    ? GFP_ATOMIC : GFP_KERNEL);
2343                 if (!riu->sge)
2344                         goto free_mem;
2345         }
2346
2347         db = ioctx->rbufs;
2348         scat = scst_cmd_get_sg(scmnd);
2349         tsize = (dir == SCST_DATA_READ) ?
2350                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2351         riu = ioctx->rdma_ius;
2352         dma_len = sg_dma_len(&scat[0]);
2353         dma_addr = sg_dma_address(&scat[0]);
2354
2355         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
2356         for (i = 0, j = 0;
2357              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2358                 rsize = be32_to_cpu(db->len);
2359                 sge = riu->sge;
2360                 k = 0;
2361
2362                 while (rsize > 0 && tsize > 0) {
2363                         sge->addr = dma_addr;
2364                         sge->lkey = ch->sport->sdev->mr->lkey;
2365
2366                         if (rsize >= dma_len) {
2367                                 sge->length =
2368                                         (tsize < dma_len) ? tsize : dma_len;
2369                                 tsize -= dma_len;
2370                                 rsize -= dma_len;
2371
2372                                 if (tsize > 0) {
2373                                         ++j;
2374                                         if (j < count) {
2375                                                 dma_len = sg_dma_len(&scat[j]);
2376                                                 dma_addr =
2377                                                     sg_dma_address(&scat[j]);
2378                                         }
2379                                 }
2380                         } else {
2381                                 sge->length = (tsize < rsize) ? tsize : rsize;
2382                                 tsize -= rsize;
2383                                 dma_len -= rsize;
2384                                 dma_addr += rsize;
2385                                 rsize = 0;
2386                         }
2387
2388                         ++k;
2389                         if (k == riu->sge_cnt && rsize > 0) {
2390                                 ++riu;
2391                                 sge = riu->sge;
2392                                 k = 0;
2393                         } else if (rsize > 0)
2394                                 ++sge;
2395                 }
2396         }
2397
2398         return 0;
2399
2400 free_mem:
2401         srpt_unmap_sg_to_ib_sge(ch, ioctx);
2402
2403         return -ENOMEM;
2404 }
2405
2406 static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
2407                                     struct srpt_ioctx *ioctx)
2408 {
2409         struct scst_cmd *scmnd;
2410         struct scatterlist *scat;
2411         scst_data_direction dir;
2412
2413         BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
2414
2415         while (ioctx->n_rdma)
2416                 kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
2417
2418         kfree(ioctx->rdma_ius);
2419         ioctx->rdma_ius = NULL;
2420
2421         scmnd = ioctx->scmnd;
2422         if (scmnd) {
2423                 BUG_ON(ioctx != scst_cmd_get_tgt_priv(scmnd));
2424                 scat = scst_cmd_get_sg(scmnd);
2425                 if (scat) {
2426                         dir = scst_cmd_get_data_direction(scmnd);
2427                         ib_dma_unmap_sg(ch->sport->sdev->device,
2428                                         scat, scst_cmd_get_sg_cnt(scmnd),
2429                                         scst_to_tgt_dma_dir(dir));
2430                 }
2431         }
2432 }
2433
2434 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2435                               scst_data_direction dir)
2436 {
2437         struct ib_send_wr wr;
2438         struct ib_send_wr *bad_wr;
2439         struct rdma_iu *riu;
2440         int i;
2441         int ret;
2442         int srq_wr_avail;
2443
2444         if (dir == SCST_DATA_WRITE) {
2445                 ret = -ENOMEM;
2446                 srq_wr_avail = atomic_sub_return(ioctx->n_rdma,
2447                                                  &ch->qp_wr_avail);
2448                 if (srq_wr_avail < 0) {
2449                         atomic_add(ioctx->n_rdma, &ch->qp_wr_avail);
2450                         PRINT_INFO("%s[%d]: SRQ full", __func__, __LINE__);
2451                         goto out;
2452                 }
2453         }
2454
2455         ret = 0;
2456         riu = ioctx->rdma_ius;
2457         memset(&wr, 0, sizeof wr);
2458
2459         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
2460                 wr.opcode = (dir == SCST_DATA_READ) ?
2461                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
2462                 wr.next = NULL;
2463                 wr.wr_id = ioctx->index;
2464                 wr.wr.rdma.remote_addr = riu->raddr;
2465                 wr.wr.rdma.rkey = riu->rkey;
2466                 wr.num_sge = riu->sge_cnt;
2467                 wr.sg_list = riu->sge;
2468
2469                 /* only get completion event for the last rdma wr */
2470                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
2471                         wr.send_flags = IB_SEND_SIGNALED;
2472
2473                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
2474                 if (ret)
2475                         goto out;
2476         }
2477
2478 out:
2479         return ret;
2480 }
2481
2482 /*
2483  * Start data transfer between initiator and target. Must not block.
2484  */
2485 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2486                           struct scst_cmd *scmnd)
2487 {
2488         int ret;
2489
2490         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
2491         if (ret) {
2492                 PRINT_ERROR("%s[%d] ret=%d", __func__, __LINE__, ret);
2493                 ret = SCST_TGT_RES_QUEUE_FULL;
2494                 goto out;
2495         }
2496
2497         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
2498         if (ret) {
2499                 if (ret == -EAGAIN || ret == -ENOMEM) {
2500                         PRINT_INFO("%s[%d] queue full -- ret=%d",
2501                                    __func__, __LINE__, ret);
2502                         ret = SCST_TGT_RES_QUEUE_FULL;
2503                 } else {
2504                         PRINT_ERROR("%s[%d] fatal error -- ret=%d",
2505                                     __func__, __LINE__, ret);
2506                         ret = SCST_TGT_RES_FATAL_ERROR;
2507                 }
2508                 goto out_unmap;
2509         }
2510
2511         ret = SCST_TGT_RES_SUCCESS;
2512
2513 out:
2514         return ret;
2515 out_unmap:
2516         srpt_unmap_sg_to_ib_sge(ch, ioctx);
2517         goto out;
2518 }
2519
2520 /*
2521  * Called by the SCST core to inform ib_srpt that data reception from the
2522  * initiator should start (SCST_DATA_WRITE). Must not block.
2523  */
2524 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
2525 {
2526         struct srpt_rdma_ch *ch;
2527         struct srpt_ioctx *ioctx;
2528         enum rdma_ch_state ch_state;
2529         int ret;
2530
2531         ioctx = scst_cmd_get_tgt_priv(scmnd);
2532         BUG_ON(!ioctx);
2533
2534         WARN_ON(srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA)
2535                 == SRPT_STATE_ABORTED);
2536
2537         ch = ioctx->ch;
2538         WARN_ON(ch != scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd)));
2539         BUG_ON(!ch);
2540
2541         ch_state = atomic_read(&ch->state);
2542         if (ch_state == RDMA_CHANNEL_DISCONNECTING) {
2543                 TRACE_DBG("cmd with tag %lld: channel disconnecting",
2544                           scst_cmd_get_tag(scmnd));
2545                 ret = SCST_TGT_RES_FATAL_ERROR;
2546                 goto out;
2547         } else if (ch_state == RDMA_CHANNEL_CONNECTING) {
2548                 ret = SCST_TGT_RES_QUEUE_FULL;
2549                 goto out;
2550         }
2551         ret = srpt_xfer_data(ch, ioctx, scmnd);
2552
2553 out:
2554         return ret;
2555 }
2556
2557 /**
2558  * srpt_xmit_response() - SCST callback function that transmits the response
2559  * to a SCSI command.
2560  *
2561  * Must not block.
2562  */
2563 static int srpt_xmit_response(struct scst_cmd *scmnd)
2564 {
2565         struct srpt_rdma_ch *ch;
2566         struct srpt_ioctx *ioctx;
2567         int ret = SCST_TGT_RES_SUCCESS;
2568         int dir;
2569         int resp_len;
2570
2571         ioctx = scst_cmd_get_tgt_priv(scmnd);
2572         BUG_ON(!ioctx);
2573
2574         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2575         BUG_ON(!ch);
2576
2577         if (unlikely(scst_cmd_aborted(scmnd))) {
2578                 TRACE_DBG("cmd with tag %lld has been aborted",
2579                           scst_cmd_get_tag(scmnd));
2580                 srpt_abort_scst_cmd(ch->sport->sdev, scmnd);
2581                 ret = SCST_TGT_RES_SUCCESS;
2582                 goto out;
2583         }
2584
2585         if (srpt_set_cmd_state(ioctx, SRPT_STATE_PROCESSED)
2586             == SRPT_STATE_ABORTED) {
2587                 ret = SCST_TGT_RES_SUCCESS;
2588                 goto out;
2589         }
2590
2591         dir = scst_cmd_get_data_direction(scmnd);
2592
2593         /* For read commands, transfer the data to the initiator. */
2594         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2595                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2596                 if (ret != SCST_TGT_RES_SUCCESS) {
2597                         PRINT_ERROR("%s: tag= %lld xfer_data failed",
2598                                     __func__,
2599                                     (unsigned long long)
2600                                     scst_cmd_get_tag(scmnd));
2601                         goto out;
2602                 }
2603         }
2604
2605         resp_len = srpt_build_cmd_rsp(ch, ioctx,
2606                                       scst_cmd_get_tag(scmnd),
2607                                       scst_cmd_get_status(scmnd),
2608                                       scst_cmd_get_sense_buffer(scmnd),
2609                                       scst_cmd_get_sense_buffer_len(scmnd));
2610
2611         if (srpt_post_send(ch, ioctx, resp_len)) {
2612                 PRINT_ERROR("%s[%d]: ch->state= %d tag= %lld",
2613                             __func__, __LINE__, atomic_read(&ch->state),
2614                             (unsigned long long)scst_cmd_get_tag(scmnd));
2615                 ret = SCST_TGT_RES_FATAL_ERROR;
2616         }
2617
2618 out:
2619         return ret;
2620 }
2621
2622 /**
2623  * srpt_tsk_mgmt_done() - SCST callback function that sends back the response
2624  * for a task management request.
2625  *
2626  * Must not block.
2627  */
2628 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2629 {
2630         struct srpt_rdma_ch *ch;
2631         struct srpt_mgmt_ioctx *mgmt_ioctx;
2632         struct srpt_ioctx *ioctx;
2633         int rsp_len;
2634
2635         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2636         BUG_ON(!mgmt_ioctx);
2637
2638         ch = mgmt_ioctx->ch;
2639         BUG_ON(!ch);
2640
2641         ioctx = mgmt_ioctx->ioctx;
2642         BUG_ON(!ioctx);
2643
2644         TRACE_DBG("%s: tsk_mgmt_done for tag= %lld status=%d",
2645                   __func__, (unsigned long long)mgmt_ioctx->tag,
2646                   scst_mgmt_cmd_get_status(mcmnd));
2647
2648         if (srpt_set_cmd_state(ioctx, SRPT_STATE_PROCESSED)
2649             == SRPT_STATE_ABORTED)
2650                 goto out;
2651
2652         rsp_len = srpt_build_tskmgmt_rsp(ch, ioctx,
2653                                          (scst_mgmt_cmd_get_status(mcmnd) ==
2654                                           SCST_MGMT_STATUS_SUCCESS) ?
2655                                          SRP_TSK_MGMT_SUCCESS :
2656                                          SRP_TSK_MGMT_FAILED,
2657                                          mgmt_ioctx->tag);
2658         srpt_post_send(ch, ioctx, rsp_len);
2659
2660         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2661
2662         kfree(mgmt_ioctx);
2663
2664 out:
2665         ;
2666 }
2667
2668 /*
2669  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2670  * to be freed. May be called in IRQ context.
2671  */
2672 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2673 {
2674         struct srpt_rdma_ch *ch;
2675         struct srpt_ioctx *ioctx;
2676
2677         ioctx = scst_cmd_get_tgt_priv(scmnd);
2678         BUG_ON(!ioctx);
2679
2680         ch = ioctx->ch;
2681         BUG_ON(!ch);
2682
2683         scst_cmd_set_tgt_priv(scmnd, NULL);
2684         srpt_set_cmd_state(ioctx, SRPT_STATE_ABORTED);
2685         ioctx->scmnd = NULL;
2686         ioctx->ch = NULL;
2687         srpt_reset_ioctx(ch, ioctx);
2688 }
2689
2690 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2691 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2692 static void srpt_refresh_port_work(void *ctx)
2693 #else
2694 static void srpt_refresh_port_work(struct work_struct *work)
2695 #endif
2696 {
2697 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2698         struct srpt_port *sport = (struct srpt_port *)ctx;
2699 #else
2700         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2701 #endif
2702
2703         srpt_refresh_port(sport);
2704 }
2705
2706 /*
2707  * Called by the SCST core to detect target adapters. Returns the number of
2708  * detected target adapters.
2709  */
2710 static int srpt_detect(struct scst_tgt_template *tp)
2711 {
2712         int device_count;
2713
2714         TRACE_ENTRY();
2715
2716         device_count = atomic_read(&srpt_device_count);
2717
2718         TRACE_EXIT_RES(device_count);
2719
2720         return device_count;
2721 }
2722
2723 /*
2724  * Callback function called by the SCST core from scst_unregister() to free up
2725  * the resources associated with device scst_tgt.
2726  */
2727 static int srpt_release(struct scst_tgt *scst_tgt)
2728 {
2729         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2730         struct srpt_rdma_ch *ch, *tmp_ch;
2731
2732         TRACE_ENTRY();
2733
2734         BUG_ON(!scst_tgt);
2735 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2736         WARN_ON(!sdev);
2737         if (!sdev)
2738                 return -ENODEV;
2739 #else
2740         if (WARN_ON(!sdev))
2741                 return -ENODEV;
2742 #endif
2743
2744 #ifdef CONFIG_SCST_PROC
2745         srpt_unregister_procfs_entry(scst_tgt->tgtt);
2746 #endif /*CONFIG_SCST_PROC*/
2747
2748         spin_lock_irq(&sdev->spinlock);
2749         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
2750                 list_del(&ch->list);
2751                 atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING);
2752                 spin_unlock_irq(&sdev->spinlock);
2753                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
2754                 scst_unregister_session(ch->scst_sess, true,
2755                                         srpt_release_channel);
2756                 spin_lock_irq(&sdev->spinlock);
2757         }
2758         spin_unlock_irq(&sdev->spinlock);
2759
2760         srpt_unregister_mad_agent(sdev);
2761
2762         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2763
2764         TRACE_EXIT();
2765
2766         return 0;
2767 }
2768
2769 /*
2770  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2771  * when the module parameter 'thread' is not zero (the default is zero).
2772  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2773  *
2774  * @pre thread != 0
2775  */
2776 static int srpt_ioctx_thread(void *arg)
2777 {
2778         struct srpt_ioctx *ioctx;
2779
2780         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2781         current->flags |= PF_NOFREEZE;
2782
2783         spin_lock_irq(&srpt_thread.thread_lock);
2784         while (!kthread_should_stop()) {
2785                 wait_queue_t wait;
2786                 init_waitqueue_entry(&wait, current);
2787
2788                 if (!srpt_test_ioctx_list()) {
2789                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2790
2791                         for (;;) {
2792                                 set_current_state(TASK_INTERRUPTIBLE);
2793                                 if (srpt_test_ioctx_list())
2794                                         break;
2795                                 spin_unlock_irq(&srpt_thread.thread_lock);
2796                                 schedule();
2797                                 spin_lock_irq(&srpt_thread.thread_lock);
2798                         }
2799                         set_current_state(TASK_RUNNING);
2800                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2801                 }
2802
2803                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2804                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2805                                            struct srpt_ioctx, comp_list);
2806
2807                         list_del(&ioctx->comp_list);
2808
2809                         spin_unlock_irq(&srpt_thread.thread_lock);
2810                         switch (ioctx->op) {
2811                         case IB_WC_SEND:
2812                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2813                                         SCST_CONTEXT_DIRECT);
2814                                 break;
2815                         case IB_WC_RDMA_WRITE:
2816                         case IB_WC_RDMA_READ:
2817                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2818                                 break;
2819                         case IB_WC_RECV:
2820                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2821                                 break;
2822                         default:
2823                                 break;
2824                         }
2825 #if defined(CONFIG_SCST_DEBUG)
2826                         if (thread_processing_delay_in_us
2827                             <= MAX_UDELAY_MS * 1000)
2828                                 udelay(thread_processing_delay_in_us);
2829 #endif
2830                         spin_lock_irq(&srpt_thread.thread_lock);
2831                 }
2832         }
2833         spin_unlock_irq(&srpt_thread.thread_lock);
2834
2835         return 0;
2836 }
2837
2838 /* SCST target template for the SRP target implementation. */
2839 static struct scst_tgt_template srpt_template = {
2840         .name = DRV_NAME,
2841         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2842         .xmit_response_atomic = 1,
2843         .rdy_to_xfer_atomic = 1,
2844         .detect = srpt_detect,
2845         .release = srpt_release,
2846         .xmit_response = srpt_xmit_response,
2847         .rdy_to_xfer = srpt_rdy_to_xfer,
2848         .on_free_cmd = srpt_on_free_cmd,
2849         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2850 };
2851
2852 /*
2853  * The callback function srpt_release_class_dev() is called whenever a
2854  * device is removed from the /sys/class/infiniband_srpt device class.
2855  * Although this function has been left empty, a release function has been
2856  * defined such that upon module removal no complaint is logged about a
2857  * missing release function.
2858  */
2859 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2860 static void srpt_release_class_dev(struct class_device *class_dev)
2861 #else
2862 static void srpt_release_class_dev(struct device *dev)
2863 #endif
2864 {
2865 }
2866
2867 #ifdef CONFIG_SCST_PROC
2868
2869 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2870 static int srpt_trace_level_show(struct seq_file *seq, void *v)
2871 {
2872         return scst_proc_log_entry_read(seq, trace_flag, NULL);
2873 }
2874
2875 static ssize_t srpt_proc_trace_level_write(struct file *file,
2876         const char __user *buf, size_t length, loff_t *off)
2877 {
2878         return scst_proc_log_entry_write(file, buf, length, &trace_flag,
2879                 DEFAULT_SRPT_TRACE_FLAGS, NULL);
2880 }
2881
2882 static struct scst_proc_data srpt_log_proc_data = {
2883         SCST_DEF_RW_SEQ_OP(srpt_proc_trace_level_write)
2884         .show = srpt_trace_level_show,
2885 };
2886 #endif
2887
2888 #endif /* CONFIG_SCST_PROC */
2889
2890 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2891 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2892 #else
2893 static ssize_t show_login_info(struct device *dev,
2894                                struct device_attribute *attr, char *buf)
2895 #endif
2896 {
2897         struct srpt_device *sdev =
2898 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2899                 container_of(class_dev, struct srpt_device, class_dev);
2900 #else
2901                 container_of(dev, struct srpt_device, dev);
2902 #endif
2903         struct srpt_port *sport;
2904         int i;
2905         int len = 0;
2906
2907         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2908                 sport = &sdev->port[i];
2909
2910                 len += sprintf(buf + len,
2911                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2912                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2913                                "service_id=%016llx\n",
2914                                (unsigned long long) srpt_service_guid,
2915                                (unsigned long long) srpt_service_guid,
2916                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2917                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2918                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2919                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2920                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2921                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2922                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2923                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2924                                (unsigned long long) srpt_service_guid);
2925         }
2926
2927         return len;
2928 }
2929
2930 static struct class_attribute srpt_class_attrs[] = {
2931         __ATTR_NULL,
2932 };
2933
2934 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2935 static struct class_device_attribute srpt_dev_attrs[] = {
2936 #else
2937 static struct device_attribute srpt_dev_attrs[] = {
2938 #endif
2939         __ATTR(login_info, S_IRUGO, show_login_info, NULL),
2940         __ATTR_NULL,
2941 };
2942
2943 static struct class srpt_class = {
2944         .name        = "infiniband_srpt",
2945 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2946         .release = srpt_release_class_dev,
2947 #else
2948         .dev_release = srpt_release_class_dev,
2949 #endif
2950         .class_attrs = srpt_class_attrs,
2951 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2952         .class_dev_attrs = srpt_dev_attrs,
2953 #else
2954         .dev_attrs   = srpt_dev_attrs,
2955 #endif
2956 };
2957
2958 /*
2959  * Callback function called by the InfiniBand core when either an InfiniBand
2960  * device has been added or during the ib_register_client() call for each
2961  * registered InfiniBand device.
2962  */
2963 static void srpt_add_one(struct ib_device *device)
2964 {
2965         struct srpt_device *sdev;
2966         struct srpt_port *sport;
2967         struct ib_srq_init_attr srq_attr;
2968         int i;
2969
2970         TRACE_ENTRY();
2971
2972         TRACE_DBG("device = %p, device->dma_ops = %p", device, device->dma_ops);
2973
2974         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2975         if (!sdev)
2976                 return;
2977
2978         sdev->device = device;
2979
2980 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2981         sdev->class_dev.class = &srpt_class;
2982         sdev->class_dev.dev = device->dma_device;
2983         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2984                  "srpt-%s", device->name);
2985 #else
2986         sdev->dev.class = &srpt_class;
2987         sdev->dev.parent = device->dma_device;
2988 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2989         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2990 #else
2991         dev_set_name(&sdev->dev, "srpt-%s", device->name);
2992 #endif
2993 #endif
2994
2995 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2996         if (class_device_register(&sdev->class_dev))
2997                 goto free_dev;
2998 #else
2999         if (device_register(&sdev->dev))
3000                 goto free_dev;
3001 #endif
3002
3003         if (ib_query_device(device, &sdev->dev_attr))
3004                 goto err_dev;
3005
3006         sdev->pd = ib_alloc_pd(device);
3007         if (IS_ERR(sdev->pd))
3008                 goto err_dev;
3009
3010         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
3011         if (IS_ERR(sdev->mr))
3012                 goto err_pd;
3013
3014         srq_attr.event_handler = srpt_srq_event;
3015         srq_attr.srq_context = (void *)sdev;
3016         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
3017         srq_attr.attr.max_sge = 1;
3018         srq_attr.attr.srq_limit = 0;
3019
3020         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
3021         if (IS_ERR(sdev->srq))
3022                 goto err_mr;
3023
3024         TRACE_DBG("%s: create SRQ #wr= %d max_allow=%d dev= %s",
3025                __func__, srq_attr.attr.max_wr,
3026               sdev->dev_attr.max_srq_wr, device->name);
3027
3028         if (!srpt_service_guid)
3029                 srpt_service_guid = be64_to_cpu(device->node_guid);
3030
3031         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
3032         if (IS_ERR(sdev->cm_id))
3033                 goto err_srq;
3034
3035         /* print out target login information */
3036         TRACE_DBG("Target login info: id_ext=%016llx,"
3037                   "ioc_guid=%016llx,pkey=ffff,service_id=%016llx",
3038                   (unsigned long long) srpt_service_guid,
3039                   (unsigned long long) srpt_service_guid,
3040                   (unsigned long long) srpt_service_guid);
3041
3042         /*
3043          * We do not have a consistent service_id (ie. also id_ext of target_id)
3044          * to identify this target. We currently use the guid of the first HCA
3045          * in the system as service_id; therefore, the target_id will change
3046          * if this HCA is gone bad and replaced by different HCA
3047          */
3048         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0, NULL))
3049                 goto err_cm;
3050
3051         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
3052                               srpt_event_handler);
3053         if (ib_register_event_handler(&sdev->event_handler))
3054                 goto err_cm;
3055
3056         if (srpt_alloc_ioctx_ring(sdev))
3057                 goto err_event;
3058
3059         INIT_LIST_HEAD(&sdev->rch_list);
3060         spin_lock_init(&sdev->spinlock);
3061
3062         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
3063                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
3064
3065         ib_set_client_data(device, &srpt_client, sdev);
3066
3067         sdev->scst_tgt = scst_register(&srpt_template, NULL);
3068         if (!sdev->scst_tgt) {
3069                 PRINT_ERROR("SCST registration failed for %s.",
3070                             sdev->device->name);
3071                 goto err_ring;
3072         }
3073
3074         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
3075
3076         WARN_ON(sdev->device->phys_port_cnt
3077                 > sizeof(sdev->port)/sizeof(sdev->port[0]));
3078
3079         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
3080                 sport = &sdev->port[i - 1];
3081                 sport->sdev = sdev;
3082                 sport->port = i;
3083 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
3084                 /*
3085                  * A vanilla 2.6.19 or older kernel without backported OFED
3086                  * kernel headers.
3087                  */
3088                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
3089 #else
3090                 INIT_WORK(&sport->work, srpt_refresh_port_work);
3091 #endif
3092                 if (srpt_refresh_port(sport)) {
3093                         PRINT_ERROR("MAD registration failed for %s-%d.",
3094                                     sdev->device->name, i);
3095                         goto err_refresh_port;
3096                 }
3097         }
3098
3099         atomic_inc(&srpt_device_count);
3100
3101         TRACE_EXIT();
3102
3103         return;
3104
3105 err_refresh_port:
3106         scst_unregister(sdev->scst_tgt);
3107 err_ring:
3108         ib_set_client_data(device, &srpt_client, NULL);
3109         srpt_free_ioctx_ring(sdev);
3110 err_event:
3111         ib_unregister_event_handler(&sdev->event_handler);
3112 err_cm:
3113         ib_destroy_cm_id(sdev->cm_id);
3114 err_srq:
3115         ib_destroy_srq(sdev->srq);
3116 err_mr:
3117         ib_dereg_mr(sdev->mr);
3118 err_pd:
3119         ib_dealloc_pd(sdev->pd);
3120 err_dev:
3121 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3122         class_device_unregister(&sdev->class_dev);
3123 #else
3124         device_unregister(&sdev->dev);
3125 #endif
3126 free_dev:
3127         kfree(sdev);
3128
3129         TRACE_EXIT();
3130 }
3131
3132 /*
3133  * Callback function called by the InfiniBand core when either an InfiniBand
3134  * device has been removed or during the ib_unregister_client() call for each
3135  * registered InfiniBand device.
3136  */
3137 static void srpt_remove_one(struct ib_device *device)
3138 {
3139         int i;
3140         struct srpt_device *sdev;
3141
3142         TRACE_ENTRY();
3143
3144         sdev = ib_get_client_data(device, &srpt_client);
3145 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
3146         WARN_ON(!sdev);
3147         if (!sdev)
3148                 return;
3149 #else
3150         if (WARN_ON(!sdev))
3151                 return;
3152 #endif
3153
3154         /*
3155          * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
3156          * finished if it is running.
3157          */
3158         for (i = 0; i < sdev->device->phys_port_cnt; i++)
3159 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
3160                 cancel_work_sync(&sdev->port[i].work);
3161 #else
3162                 /*
3163                  * cancel_work_sync() was introduced in kernel 2.6.22. Older
3164                  * kernels do not have a facility to cancel scheduled work.
3165                  */
3166                 PRINT_ERROR("%s",
3167                        "your kernel does not provide cancel_work_sync().");
3168 #endif
3169
3170         scst_unregister(sdev->scst_tgt);
3171         sdev->scst_tgt = NULL;
3172
3173         ib_unregister_event_handler(&sdev->event_handler);
3174         ib_destroy_cm_id(sdev->cm_id);
3175         ib_destroy_srq(sdev->srq);
3176         ib_dereg_mr(sdev->mr);
3177         ib_dealloc_pd(sdev->pd);
3178 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3179         class_device_unregister(&sdev->class_dev);
3180 #else
3181         device_unregister(&sdev->dev);
3182 #endif
3183
3184         srpt_free_ioctx_ring(sdev);
3185         kfree(sdev);
3186
3187         TRACE_EXIT();
3188 }
3189
3190 #ifdef CONFIG_SCST_PROC
3191
3192 /**
3193  * Create procfs entries for srpt. Currently the only procfs entry created
3194  * by this function is the "trace_level" entry.
3195  */
3196 static int srpt_register_procfs_entry(struct scst_tgt_template *tgt)
3197 {
3198         int res = 0;
3199 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
3200         struct proc_dir_entry *p, *root;
3201
3202         root = scst_proc_get_tgt_root(tgt);
3203         WARN_ON(!root);
3204         if (root) {
3205                 /*
3206                  * Fill in the scst_proc_data::data pointer, which is used in
3207                  * a printk(KERN_INFO ...) statement in
3208                  * scst_proc_log_entry_write() in scst_proc.c.
3209                  */
3210                 srpt_log_proc_data.data = (char *)tgt->name;
3211                 p = scst_create_proc_entry(root, SRPT_PROC_TRACE_LEVEL_NAME,
3212                                            &srpt_log_proc_data);
3213                 if (!p)
3214                         res = -ENOMEM;
3215         } else
3216                 res = -ENOMEM;
3217
3218 #endif
3219         return res;
3220 }
3221
3222 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt)
3223 {
3224 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
3225         struct proc_dir_entry *root;
3226
3227         root = scst_proc_get_tgt_root(tgt);
3228         WARN_ON(!root);
3229         if (root)
3230                 remove_proc_entry(SRPT_PROC_TRACE_LEVEL_NAME, root);
3231 #endif
3232 }
3233
3234 #endif /*CONFIG_SCST_PROC*/
3235
3236 /*
3237  * Module initialization.
3238  *
3239  * Note: since ib_register_client() registers callback functions, and since at
3240  * least one of these callback functions (srpt_add_one()) calls SCST functions,
3241  * the SCST target template must be registered before ib_register_client() is
3242  * called.
3243  */
3244 static int __init srpt_init_module(void)
3245 {
3246         int ret;
3247
3248         ret = -EINVAL;
3249         if (srp_max_message_size < MIN_MAX_MESSAGE_SIZE) {
3250                 PRINT_ERROR("invalid value %d for kernel module parameter"
3251                             " srp_max_message_size -- must be at least %d.",
3252                             srp_max_message_size,
3253                             MIN_MAX_MESSAGE_SIZE);
3254                 goto out;
3255         }
3256
3257         ret = class_register(&srpt_class);
3258         if (ret) {
3259                 PRINT_ERROR("%s", "couldn't register class ib_srpt");
3260                 goto out;
3261         }
3262
3263         ret = scst_register_target_template(&srpt_template);
3264         if (ret < 0) {
3265                 PRINT_ERROR("%s", "couldn't register with scst");
3266                 ret = -ENODEV;
3267                 goto out_unregister_class;
3268         }
3269
3270 #ifdef CONFIG_SCST_PROC
3271         ret = srpt_register_procfs_entry(&srpt_template);
3272         if (ret) {
3273                 PRINT_ERROR("%s", "couldn't register procfs entry");
3274                 goto out_unregister_target;
3275         }
3276 #endif /*CONFIG_SCST_PROC*/
3277
3278         ret = ib_register_client(&srpt_client);
3279         if (ret) {
3280                 PRINT_ERROR("%s", "couldn't register IB client");
3281                 goto out_unregister_target;
3282         }
3283
3284         if (thread) {
3285                 spin_lock_init(&srpt_thread.thread_lock);
3286                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
3287                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
3288                                                  NULL, "srpt_thread");
3289                 if (IS_ERR(srpt_thread.thread)) {
3290                         srpt_thread.thread = NULL;
3291                         thread = 0;
3292                 }
3293         }
3294
3295         return 0;
3296
3297 out_unregister_target:
3298 #ifdef CONFIG_SCST_PROC
3299         /*
3300          * Note: the procfs entry is unregistered in srpt_release(), which is
3301          * called by scst_unregister_target_template().
3302          */
3303 #endif /*CONFIG_SCST_PROC*/
3304         scst_unregister_target_template(&srpt_template);
3305 out_unregister_class:
3306         class_unregister(&srpt_class);
3307 out:
3308         return ret;
3309 }
3310
3311 static void __exit srpt_cleanup_module(void)
3312 {
3313         TRACE_ENTRY();
3314
3315         if (srpt_thread.thread)
3316                 kthread_stop(srpt_thread.thread);
3317         ib_unregister_client(&srpt_client);
3318         scst_unregister_target_template(&srpt_template);
3319         class_unregister(&srpt_class);
3320
3321         TRACE_EXIT();
3322 }
3323
3324 module_init(srpt_init_module);
3325 module_exit(srpt_cleanup_module);