Changes:
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2010 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43 #include <linux/delay.h>
44 #include <asm/atomic.h>
45 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
46 #include <linux/proc_fs.h>
47 #include <linux/seq_file.h>
48 #endif
49 #include "ib_srpt.h"
50 #define LOG_PREFIX "ib_srpt" /* Prefix for SCST tracing macros. */
51 #include "scst_debug.h"
52
53 #define CONFIG_SCST_PROC
54
55 /* Name of this kernel module. */
56 #define DRV_NAME                "ib_srpt"
57 #define DRV_VERSION             "1.0.1"
58 #define DRV_RELDATE             "July 10, 2008"
59 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
60 /* Flags to be used in SCST debug tracing statements. */
61 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
62                                   | TRACE_MGMT | TRACE_SPECIAL)
63 /* Name of the entry that will be created under /proc/scsi_tgt/ib_srpt. */
64 #define SRPT_PROC_TRACE_LEVEL_NAME      "trace_level"
65 #endif
66
67 #define MELLANOX_SRPT_ID_STRING "SCST SRP target"
68
69 MODULE_AUTHOR("Vu Pham");
70 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
71                    "v" DRV_VERSION " (" DRV_RELDATE ")");
72 MODULE_LICENSE("Dual BSD/GPL");
73
74 struct srpt_thread {
75         /* Protects thread_ioctx_list. */
76         spinlock_t thread_lock;
77         /* I/O contexts to be processed by the kernel thread. */
78         struct list_head thread_ioctx_list;
79         /* SRPT kernel thread. */
80         struct task_struct *thread;
81 };
82
83 /*
84  * Global Variables
85  */
86
87 static u64 srpt_service_guid;
88 /* List of srpt_device structures. */
89 static atomic_t srpt_device_count;
90 static int use_port_guid_in_session_name;
91 static int thread = 1;
92 static struct srpt_thread srpt_thread;
93 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
94 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
95 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
96 module_param(trace_flag, long, 0644);
97 MODULE_PARM_DESC(trace_flag,
98                  "Trace flags for the ib_srpt kernel module.");
99 #endif
100 #if defined(CONFIG_SCST_DEBUG)
101 static unsigned long interrupt_processing_delay_in_us;
102 module_param(interrupt_processing_delay_in_us, long, 0744);
103 MODULE_PARM_DESC(interrupt_processing_delay_in_us,
104                  "CQ completion handler interrupt delay in microseconds.");
105 static unsigned long thread_processing_delay_in_us;
106 module_param(thread_processing_delay_in_us, long, 0744);
107 MODULE_PARM_DESC(thread_processing_delay_in_us,
108                  "SRP thread processing delay in microseconds.");
109 #endif
110
111 module_param(thread, int, 0444);
112 MODULE_PARM_DESC(thread,
113                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
114                  "where possible.");
115
116 static unsigned int srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE;
117 module_param(srp_max_rdma_size, int, 0744);
118 MODULE_PARM_DESC(thread,
119                  "Maximum size of SRP RDMA transfers for new connections.");
120
121 static unsigned int srp_max_message_size = DEFAULT_MAX_MESSAGE_SIZE;
122 module_param(srp_max_message_size, int, 0444);
123 MODULE_PARM_DESC(thread,
124                  "Maximum size of SRP control messages in bytes.");
125
126 module_param(use_port_guid_in_session_name, bool, 0444);
127 MODULE_PARM_DESC(use_port_guid_in_session_name,
128                  "Use target port ID in the SCST session name such that"
129                  " redundant paths between multiport systems can be masked.");
130
131 static void srpt_add_one(struct ib_device *device);
132 static void srpt_remove_one(struct ib_device *device);
133 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
134 #ifdef CONFIG_SCST_PROC
135 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt);
136 #endif /*CONFIG_SCST_PROC*/
137 static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
138                                     struct srpt_ioctx *ioctx);
139 static void srpt_release_channel(struct scst_session *scst_sess);
140
141 static struct ib_client srpt_client = {
142         .name = DRV_NAME,
143         .add = srpt_add_one,
144         .remove = srpt_remove_one
145 };
146
147 /**
148  * Atomically test and set the channel state.
149  * @ch: RDMA channel.
150  * @old: channel state to compare with.
151  * @new: state to change the channel state to if the current state matches the
152  *       argument 'old'.
153  *
154  * Returns the previous channel state.
155  */
156 static enum rdma_ch_state
157 srpt_test_and_set_channel_state(struct srpt_rdma_ch *ch,
158                                 enum rdma_ch_state old,
159                                 enum rdma_ch_state new)
160 {
161         return atomic_cmpxchg(&ch->state, old, new);
162 }
163
164 /*
165  * Callback function called by the InfiniBand core when an asynchronous IB
166  * event occurs. This callback may occur in interrupt context. See also
167  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
168  * Architecture Specification.
169  */
170 static void srpt_event_handler(struct ib_event_handler *handler,
171                                struct ib_event *event)
172 {
173         struct srpt_device *sdev;
174         struct srpt_port *sport;
175
176         TRACE_ENTRY();
177
178         sdev = ib_get_client_data(event->device, &srpt_client);
179         if (!sdev || sdev->device != event->device)
180                 return;
181
182         TRACE_DBG("ASYNC event= %d on device= %s",
183                   event->event, sdev->device->name);
184
185         switch (event->event) {
186         case IB_EVENT_PORT_ERR:
187                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
188                         sport = &sdev->port[event->element.port_num - 1];
189                         sport->lid = 0;
190                         sport->sm_lid = 0;
191                 }
192                 break;
193         case IB_EVENT_PORT_ACTIVE:
194         case IB_EVENT_LID_CHANGE:
195         case IB_EVENT_PKEY_CHANGE:
196         case IB_EVENT_SM_CHANGE:
197         case IB_EVENT_CLIENT_REREGISTER:
198                 /*
199                  * Refresh port data asynchronously. Note: it is safe to call
200                  * schedule_work() even if &sport->work is already on the
201                  * global workqueue because schedule_work() tests for the
202                  * work_pending() condition before adding &sport->work to the
203                  * global work queue.
204                  */
205                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
206                         sport = &sdev->port[event->element.port_num - 1];
207                         if (!sport->lid && !sport->sm_lid)
208                                 schedule_work(&sport->work);
209                 }
210                 break;
211         default:
212                 PRINT_ERROR("received unrecognized IB event %d", event->event);
213                 break;
214         }
215
216         TRACE_EXIT();
217 }
218
219 /*
220  * Callback function called by the InfiniBand core for SRQ (shared receive
221  * queue) events.
222  */
223 static void srpt_srq_event(struct ib_event *event, void *ctx)
224 {
225         TRACE_ENTRY();
226
227         TRACE_DBG("SRQ event %d", event->event);
228
229         TRACE_EXIT();
230 }
231
232 /*
233  * Callback function called by the InfiniBand core for QP (queue pair) events.
234  */
235 static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
236 {
237         TRACE_DBG("QP event %d on cm_id=%p sess_name=%s state=%d",
238                   event->event, ch->cm_id, ch->sess_name,
239                   atomic_read(&ch->state));
240
241         switch (event->event) {
242         case IB_EVENT_COMM_EST:
243 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
244                 ib_cm_notify(ch->cm_id, event->event);
245 #else
246                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
247                 PRINT_ERROR("%s", "how to perform ib_cm_notify() on a"
248                             " vanilla 2.6.18 kernel ???");
249 #endif
250                 break;
251         case IB_EVENT_QP_LAST_WQE_REACHED:
252                 if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_LIVE,
253                         RDMA_CHANNEL_DISCONNECTING) == RDMA_CHANNEL_LIVE) {
254                         PRINT_INFO("disconnected session %s.", ch->sess_name);
255                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
256                 }
257                 break;
258         default:
259                 PRINT_ERROR("received unrecognized IB QP event %d",
260                             event->event);
261                 break;
262         }
263 }
264
265 /*
266  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
267  * the lowest four bits of value in element slot of the array of four bit
268  * elements called c_list (controller list). The index slot is one-based.
269  *
270  * @pre 1 <= slot && 0 <= value && value < 16
271  */
272 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
273 {
274         u16 id;
275         u8 tmp;
276
277         id = (slot - 1) / 2;
278         if (slot & 0x1) {
279                 tmp = c_list[id] & 0xf;
280                 c_list[id] = (value << 4) | tmp;
281         } else {
282                 tmp = c_list[id] & 0xf0;
283                 c_list[id] = (value & 0xf) | tmp;
284         }
285 }
286
287 /*
288  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
289  * ClassPortInfo in the InfiniBand Architecture Specification.
290  */
291 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
292 {
293         struct ib_class_port_info *cif;
294
295         cif = (struct ib_class_port_info *)mad->data;
296         memset(cif, 0, sizeof *cif);
297         cif->base_version = 1;
298         cif->class_version = 1;
299         cif->resp_time_value = 20;
300
301         mad->mad_hdr.status = 0;
302 }
303
304 /*
305  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
306  * InfiniBand Architecture Specification. See also section B.7,
307  * table B.6 in the T10 SRP r16a document.
308  */
309 static void srpt_get_iou(struct ib_dm_mad *mad)
310 {
311         struct ib_dm_iou_info *ioui;
312         u8 slot;
313         int i;
314
315         ioui = (struct ib_dm_iou_info *)mad->data;
316         ioui->change_id = 1;
317         ioui->max_controllers = 16;
318
319         /* set present for slot 1 and empty for the rest */
320         srpt_set_ioc(ioui->controller_list, 1, 1);
321         for (i = 1, slot = 2; i < 16; i++, slot++)
322                 srpt_set_ioc(ioui->controller_list, slot, 0);
323
324         mad->mad_hdr.status = 0;
325 }
326
327 /*
328  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
329  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
330  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
331  * document.
332  */
333 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
334                          struct ib_dm_mad *mad)
335 {
336         struct ib_dm_ioc_profile *iocp;
337
338         iocp = (struct ib_dm_ioc_profile *)mad->data;
339
340         if (!slot || slot > 16) {
341                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
342                 return;
343         }
344
345         if (slot > 2) {
346                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
347                 return;
348         }
349
350         memset(iocp, 0, sizeof *iocp);
351         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
352         iocp->guid = cpu_to_be64(srpt_service_guid);
353         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
354         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
355         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
356         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
357         iocp->subsys_device_id = 0x0;
358         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
359         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
360         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
361         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
362         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
363         iocp->rdma_read_depth = 4;
364         iocp->send_size = cpu_to_be32(srp_max_message_size);
365         iocp->rdma_size = cpu_to_be32(min(max(srp_max_rdma_size, 256U),
366                                           1U << 24));
367         iocp->num_svc_entries = 1;
368         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
369                 SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
370
371         mad->mad_hdr.status = 0;
372 }
373
374 /*
375  * Device management: write ServiceEntries to mad for the given slot. See also
376  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
377  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
378  */
379 static void srpt_get_svc_entries(u64 ioc_guid,
380                                  u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
381 {
382         struct ib_dm_svc_entries *svc_entries;
383
384         WARN_ON(!ioc_guid);
385
386         if (!slot || slot > 16) {
387                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
388                 return;
389         }
390
391         if (slot > 2 || lo > hi || hi > 1) {
392                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
393                 return;
394         }
395
396         svc_entries = (struct ib_dm_svc_entries *)mad->data;
397         memset(svc_entries, 0, sizeof *svc_entries);
398         svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid);
399         snprintf(svc_entries->service_entries[0].name,
400                  sizeof(svc_entries->service_entries[0].name),
401                  "%s%016llx",
402                  SRP_SERVICE_NAME_PREFIX,
403                  (unsigned long long)ioc_guid);
404
405         mad->mad_hdr.status = 0;
406 }
407
408 /*
409  * Actual processing of a received MAD *rq_mad received through source port *sp
410  * (MAD = InfiniBand management datagram). The response to be sent back is
411  * written to *rsp_mad.
412  */
413 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
414                                  struct ib_dm_mad *rsp_mad)
415 {
416         u16 attr_id;
417         u32 slot;
418         u8 hi, lo;
419
420         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
421         switch (attr_id) {
422         case DM_ATTR_CLASS_PORT_INFO:
423                 srpt_get_class_port_info(rsp_mad);
424                 break;
425         case DM_ATTR_IOU_INFO:
426                 srpt_get_iou(rsp_mad);
427                 break;
428         case DM_ATTR_IOC_PROFILE:
429                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
430                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
431                 break;
432         case DM_ATTR_SVC_ENTRIES:
433                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
434                 hi = (u8) ((slot >> 8) & 0xff);
435                 lo = (u8) (slot & 0xff);
436                 slot = (u16) ((slot >> 16) & 0xffff);
437                 srpt_get_svc_entries(srpt_service_guid,
438                                      slot, hi, lo, rsp_mad);
439                 break;
440         default:
441                 rsp_mad->mad_hdr.status =
442                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
443                 break;
444         }
445 }
446
447 /*
448  * Callback function that is called by the InfiniBand core after transmission of
449  * a MAD. (MAD = management datagram; AH = address handle.)
450  */
451 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
452                                   struct ib_mad_send_wc *mad_wc)
453 {
454         ib_destroy_ah(mad_wc->send_buf->ah);
455         ib_free_send_mad(mad_wc->send_buf);
456 }
457
458 /*
459  * Callback function that is called by the InfiniBand core after reception of
460  * a MAD (management datagram).
461  */
462 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
463                                   struct ib_mad_recv_wc *mad_wc)
464 {
465         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
466         struct ib_ah *ah;
467         struct ib_mad_send_buf *rsp;
468         struct ib_dm_mad *dm_mad;
469
470         if (!mad_wc || !mad_wc->recv_buf.mad)
471                 return;
472
473         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
474                                   mad_wc->recv_buf.grh, mad_agent->port_num);
475         if (IS_ERR(ah))
476                 goto err;
477
478         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
479
480         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
481                                  mad_wc->wc->pkey_index, 0,
482                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
483                                  GFP_KERNEL);
484         if (IS_ERR(rsp))
485                 goto err_rsp;
486
487         rsp->ah = ah;
488
489         dm_mad = rsp->mad;
490         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
491         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
492         dm_mad->mad_hdr.status = 0;
493
494         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
495         case IB_MGMT_METHOD_GET:
496                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
497                 break;
498         case IB_MGMT_METHOD_SET:
499                 dm_mad->mad_hdr.status =
500                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
501                 break;
502         default:
503                 dm_mad->mad_hdr.status =
504                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
505                 break;
506         }
507
508         if (!ib_post_send_mad(rsp, NULL)) {
509                 ib_free_recv_mad(mad_wc);
510                 /* will destroy_ah & free_send_mad in send completion */
511                 return;
512         }
513
514         ib_free_send_mad(rsp);
515
516 err_rsp:
517         ib_destroy_ah(ah);
518 err:
519         ib_free_recv_mad(mad_wc);
520 }
521
522 /*
523  * Enable InfiniBand management datagram processing, update the cached sm_lid,
524  * lid and gid values, and register a callback function for processing MADs
525  * on the specified port. It is safe to call this function more than once for
526  * the same port.
527  */
528 static int srpt_refresh_port(struct srpt_port *sport)
529 {
530         struct ib_mad_reg_req reg_req;
531         struct ib_port_modify port_modify;
532         struct ib_port_attr port_attr;
533         int ret;
534
535         TRACE_ENTRY();
536
537         memset(&port_modify, 0, sizeof port_modify);
538         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
539         port_modify.clr_port_cap_mask = 0;
540
541         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
542         if (ret)
543                 goto err_mod_port;
544
545         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
546         if (ret)
547                 goto err_query_port;
548
549         sport->sm_lid = port_attr.sm_lid;
550         sport->lid = port_attr.lid;
551
552         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
553         if (ret)
554                 goto err_query_port;
555
556         if (!sport->mad_agent) {
557                 memset(&reg_req, 0, sizeof reg_req);
558                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
559                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
560                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
561                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
562
563                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
564                                                          sport->port,
565                                                          IB_QPT_GSI,
566                                                          &reg_req, 0,
567                                                          srpt_mad_send_handler,
568                                                          srpt_mad_recv_handler,
569                                                          sport);
570                 if (IS_ERR(sport->mad_agent)) {
571                         ret = PTR_ERR(sport->mad_agent);
572                         sport->mad_agent = NULL;
573                         goto err_query_port;
574                 }
575         }
576
577         TRACE_EXIT_RES(0);
578
579         return 0;
580
581 err_query_port:
582
583         port_modify.set_port_cap_mask = 0;
584         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
585         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
586
587 err_mod_port:
588
589         TRACE_EXIT_RES(ret);
590
591         return ret;
592 }
593
594 /*
595  * Unregister the callback function for processing MADs and disable MAD
596  * processing for all ports of the specified device. It is safe to call this
597  * function more than once for the same device.
598  */
599 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
600 {
601         struct ib_port_modify port_modify = {
602                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
603         };
604         struct srpt_port *sport;
605         int i;
606
607         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
608                 sport = &sdev->port[i - 1];
609                 WARN_ON(sport->port != i);
610                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
611                         PRINT_ERROR("%s", "disabling MAD processing failed.");
612                 if (sport->mad_agent) {
613                         ib_unregister_mad_agent(sport->mad_agent);
614                         sport->mad_agent = NULL;
615                 }
616         }
617 }
618
619 /**
620  * Allocate and initialize an SRPT I/O context structure.
621  */
622 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
623 {
624         struct srpt_ioctx *ioctx;
625
626         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
627         if (!ioctx)
628                 goto out;
629
630         ioctx->buf = kzalloc(srp_max_message_size, GFP_KERNEL);
631         if (!ioctx->buf)
632                 goto out_free_ioctx;
633
634         ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf,
635                                        srp_max_message_size, DMA_BIDIRECTIONAL);
636         if (ib_dma_mapping_error(sdev->device, ioctx->dma))
637                 goto out_free_buf;
638
639         return ioctx;
640
641 out_free_buf:
642         kfree(ioctx->buf);
643 out_free_ioctx:
644         kfree(ioctx);
645 out:
646         return NULL;
647 }
648
649 /*
650  * Deallocate an SRPT I/O context structure.
651  */
652 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
653 {
654         if (!ioctx)
655                 return;
656
657         ib_dma_unmap_single(sdev->device, ioctx->dma,
658                             srp_max_message_size, DMA_BIDIRECTIONAL);
659         kfree(ioctx->buf);
660         kfree(ioctx);
661 }
662
663 /**
664  * srpt_alloc_ioctx_ring() -- allocate a ring of SRPT I/O context structures.
665  * @sdev: device to allocate the I/O context ring for.
666  * @ioctx_ring: pointer to an array of I/O contexts.
667  * @ring_size: number of elements in the I/O context ring.
668  * @flags: flags to be set in the ring index.
669  */
670 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev,
671                                  struct srpt_ioctx **ioctx_ring,
672                                  int ring_size,
673                                  u32 flags)
674 {
675         int res;
676         int i;
677
678         TRACE_ENTRY();
679
680         res = -ENOMEM;
681         for (i = 0; i < ring_size; ++i) {
682                 ioctx_ring[i] = srpt_alloc_ioctx(sdev);
683
684                 if (!ioctx_ring[i])
685                         goto err;
686
687                 WARN_ON(i & flags);
688                 ioctx_ring[i]->index = i | flags;
689         }
690         res = 0;
691         goto out;
692
693 err:
694         while (--i > 0) {
695                 srpt_free_ioctx(sdev, ioctx_ring[i]);
696                 ioctx_ring[i] = NULL;
697         }
698 out:
699         TRACE_EXIT_RES(res);
700         return res;
701 }
702
703 /* Free the ring of SRPT I/O context structures. */
704 static void srpt_free_ioctx_ring(struct srpt_device *sdev,
705                                  struct srpt_ioctx **ioctx_ring,
706                                  int ring_size)
707 {
708         int i;
709
710         for (i = 0; i < ring_size; ++i) {
711                 srpt_free_ioctx(sdev, ioctx_ring[i]);
712                 ioctx_ring[i] = NULL;
713         }
714 }
715
716 /**
717  * Set the state of a command.
718  * @new: New state to be set.
719  *
720  * Does not modify the state of aborted commands. Returns the previous command
721  * state.
722  */
723 static enum srpt_command_state srpt_set_cmd_state(struct srpt_ioctx *ioctx,
724                                                   enum srpt_command_state new)
725 {
726         enum srpt_command_state previous;
727
728         WARN_ON(!ioctx);
729         WARN_ON(new == SRPT_STATE_NEW);
730
731         do {
732                 previous = atomic_read(&ioctx->state);
733         } while (previous != SRPT_STATE_ABORTED
734                && atomic_cmpxchg(&ioctx->state, previous, new) != previous);
735
736         return previous;
737 }
738
739 /**
740  * Test and set the state of a command.
741  * @old: State to compare against.
742  * @new: New state to be set if the current state matches 'old'.
743  *
744  * Returns the previous command state.
745  */
746 static enum srpt_command_state
747 srpt_test_and_set_cmd_state(struct srpt_ioctx *ioctx,
748                             enum srpt_command_state old,
749                             enum srpt_command_state new)
750 {
751         WARN_ON(!ioctx);
752         WARN_ON(old == SRPT_STATE_ABORTED);
753         WARN_ON(new == SRPT_STATE_NEW);
754
755         return atomic_cmpxchg(&ioctx->state, old, new);
756 }
757
758 /**
759  * Post a receive request on the work queue of InfiniBand device 'sdev'.
760  */
761 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
762 {
763         struct ib_sge list;
764         struct ib_recv_wr wr, *bad_wr;
765
766         wr.wr_id = ioctx->index | SRPT_OP_RECV;
767
768         list.addr = ioctx->dma;
769         list.length = srp_max_message_size;
770         list.lkey = sdev->mr->lkey;
771
772         wr.next = NULL;
773         wr.sg_list = &list;
774         wr.num_sge = 1;
775
776         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
777 }
778
779 /**
780  * Post an IB send request.
781  * @ch: RDMA channel to post the send request on.
782  * @ioctx: I/O context of the send request.
783  * @len: length of the request to be sent in bytes.
784  *
785  * Returns zero upon success and a non-zero value upon failure.
786  */
787 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
788                           int len)
789 {
790         struct ib_sge list;
791         struct ib_send_wr wr, *bad_wr;
792         struct srpt_device *sdev = ch->sport->sdev;
793         int ret;
794
795         ret = -ENOMEM;
796         if (atomic_dec_return(&ch->qp_wr_avail) < 0) {
797                 PRINT_ERROR("%s[%d]: SRQ full", __func__, __LINE__);
798                 goto out;
799         }
800
801         ib_dma_sync_single_for_device(sdev->device, ioctx->dma,
802                                       len, DMA_TO_DEVICE);
803
804         list.addr = ioctx->dma;
805         list.length = len;
806         list.lkey = sdev->mr->lkey;
807
808         wr.next = NULL;
809         wr.wr_id = ioctx->index;
810         wr.sg_list = &list;
811         wr.num_sge = 1;
812         wr.opcode = IB_WR_SEND;
813         wr.send_flags = IB_SEND_SIGNALED;
814
815         ret = ib_post_send(ch->qp, &wr, &bad_wr);
816
817 out:
818         if (ret < 0)
819                 atomic_inc(&ch->qp_wr_avail);
820         return ret;
821 }
822
823 /**
824  * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request.
825  * @ioctx: Pointer to the I/O context associated with the request.
826  * @srp_cmd: Pointer to the SRP_CMD request data.
827  * @dir: Pointer to the variable to which the transfer direction will be
828  *   written.
829  * @data_len: Pointer to the variable to which the total data length of all
830  *   descriptors in the SRP_CMD request will be written.
831  *
832  * This function initializes ioctx->nrbuf and ioctx->r_bufs.
833  *
834  * Returns -EINVAL when the SRP_CMD request contains inconsistent descriptors;
835  * -ENOMEM when memory allocation fails and zero upon success.
836  */
837 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
838                              scst_data_direction *dir, u64 *data_len)
839 {
840         struct srp_indirect_buf *idb;
841         struct srp_direct_buf *db;
842         unsigned add_cdb_offset;
843         int ret;
844
845         /*
846          * The pointer computations below will only be compiled correctly
847          * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check
848          * whether srp_cmd::add_data has been declared as a byte pointer.
849          */
850 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
851         BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0)
852                      && !__same_type(srp_cmd->add_data[0], (u8)0));
853 #else
854         /* Note: the __same_type() macro has been introduced in kernel 2.6.31.*/
855 #endif
856
857         BUG_ON(!dir);
858         BUG_ON(!data_len);
859
860         ret = 0;
861         *data_len = 0;
862
863         /*
864          * The lower four bits of the buffer format field contain the DATA-IN
865          * buffer descriptor format, and the highest four bits contain the
866          * DATA-OUT buffer descriptor format.
867          */
868         *dir = SCST_DATA_NONE;
869         if (srp_cmd->buf_fmt & 0xf)
870                 /* DATA-IN: transfer data from target to initiator. */
871                 *dir = SCST_DATA_READ;
872         else if (srp_cmd->buf_fmt >> 4)
873                 /* DATA-OUT: transfer data from initiator to target. */
874                 *dir = SCST_DATA_WRITE;
875
876         /*
877          * According to the SRP spec, the lower two bits of the 'ADDITIONAL
878          * CDB LENGTH' field are reserved and the size in bytes of this field
879          * is four times the value specified in bits 3..7. Hence the "& ~3".
880          */
881         add_cdb_offset = srp_cmd->add_cdb_len & ~3;
882         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
883             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
884                 ioctx->n_rbuf = 1;
885                 ioctx->rbufs = &ioctx->single_rbuf;
886
887                 db = (struct srp_direct_buf *)(srp_cmd->add_data
888                                                + add_cdb_offset);
889                 memcpy(ioctx->rbufs, db, sizeof *db);
890                 *data_len = be32_to_cpu(db->len);
891         } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) ||
892                    ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) {
893                 idb = (struct srp_indirect_buf *)(srp_cmd->add_data
894                                                   + add_cdb_offset);
895
896                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
897
898                 if (ioctx->n_rbuf >
899                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
900                         PRINT_ERROR("received corrupt SRP_CMD request"
901                                     " (%u out + %u in != %u / %zu)",
902                                     srp_cmd->data_out_desc_cnt,
903                                     srp_cmd->data_in_desc_cnt,
904                                     be32_to_cpu(idb->table_desc.len),
905                                     sizeof(*db));
906                         ioctx->n_rbuf = 0;
907                         ret = -EINVAL;
908                         goto out;
909                 }
910
911                 if (ioctx->n_rbuf == 1)
912                         ioctx->rbufs = &ioctx->single_rbuf;
913                 else {
914                         ioctx->rbufs =
915                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
916                         if (!ioctx->rbufs) {
917                                 ioctx->n_rbuf = 0;
918                                 ret = -ENOMEM;
919                                 goto out;
920                         }
921                 }
922
923                 db = idb->desc_list;
924                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
925                 *data_len = be32_to_cpu(idb->len);
926         }
927 out:
928         return ret;
929 }
930
931 /*
932  * Modify the attributes of queue pair 'qp': allow local write, remote read,
933  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
934  */
935 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
936 {
937         struct ib_qp_attr *attr;
938         int ret;
939
940         attr = kzalloc(sizeof *attr, GFP_KERNEL);
941         if (!attr)
942                 return -ENOMEM;
943
944         attr->qp_state = IB_QPS_INIT;
945         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
946             IB_ACCESS_REMOTE_WRITE;
947         attr->port_num = ch->sport->port;
948         attr->pkey_index = 0;
949
950         ret = ib_modify_qp(qp, attr,
951                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
952                            IB_QP_PKEY_INDEX);
953
954         kfree(attr);
955         return ret;
956 }
957
958 /**
959  * Change the state of a channel to 'ready to receive' (RTR).
960  * @ch: channel of the queue pair.
961  * @qp: queue pair to change the state of.
962  *
963  * Returns zero upon success and a negative value upon failure.
964  *
965  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
966  * If this structure ever becomes larger, it might be necessary to allocate
967  * it dynamically instead of on the stack.
968  */
969 static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
970 {
971         struct ib_qp_attr qp_attr;
972         int attr_mask;
973         int ret;
974
975         qp_attr.qp_state = IB_QPS_RTR;
976         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
977         if (ret)
978                 goto out;
979
980         qp_attr.max_dest_rd_atomic = 4;
981
982         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
983
984 out:
985         return ret;
986 }
987
988 /**
989  * Change the state of a channel to 'ready to send' (RTS).
990  * @ch: channel of the queue pair.
991  * @qp: queue pair to change the state of.
992  *
993  * Returns zero upon success and a negative value upon failure.
994  *
995  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
996  * If this structure ever becomes larger, it might be necessary to allocate
997  * it dynamically instead of on the stack.
998  */
999 static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
1000 {
1001         struct ib_qp_attr qp_attr;
1002         int attr_mask;
1003         int ret;
1004
1005         qp_attr.qp_state = IB_QPS_RTS;
1006         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
1007         if (ret)
1008                 goto out;
1009
1010         qp_attr.max_rd_atomic = 4;
1011
1012         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
1013
1014 out:
1015         return ret;
1016 }
1017
1018 /**
1019  * srpt_req_lim_delta() - Compute by how much req_lim changed since the
1020  * last time this function has been called. This value is necessary for
1021  * filling in the REQUEST LIMIT DELTA field of an SRP_RSP response.
1022  *
1023  * Side Effect: Modifies ch->last_response_req_lim.
1024  */
1025 static int srpt_req_lim_delta(struct srpt_rdma_ch *ch)
1026 {
1027         int req_lim;
1028         int last_rsp_req_lim;
1029
1030         req_lim = atomic_read(&ch->req_lim);
1031         last_rsp_req_lim = atomic_xchg(&ch->last_response_req_lim, req_lim);
1032         return req_lim - last_rsp_req_lim;
1033 }
1034
1035 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
1036 {
1037         srpt_unmap_sg_to_ib_sge(ch, ioctx);
1038
1039         if (ioctx->n_rbuf > 1) {
1040                 kfree(ioctx->rbufs);
1041                 ioctx->rbufs = NULL;
1042         }
1043
1044         WARN_ON(!ch);
1045         if (!ch)
1046                 return;
1047
1048         if (srpt_post_recv(ch->sport->sdev, ioctx))
1049                 PRINT_ERROR("%s", "SRQ post_recv failed - this is serious.");
1050                 /* we should queue it back to free_ioctx queue */
1051         else {
1052                 int req_lim;
1053
1054                 req_lim = atomic_inc_return(&ch->req_lim);
1055                 if (req_lim < 0 || req_lim > SRPT_RQ_SIZE)
1056                         PRINT_ERROR("internal error: req_lim = %d out of range"
1057                                     "%d .. %d", req_lim, 0, SRPT_RQ_SIZE);
1058         }
1059 }
1060
1061 /**
1062  * Abort a command.
1063  */
1064 static void srpt_abort_scst_cmd(struct srpt_device *sdev,
1065                                 struct scst_cmd *scmnd)
1066 {
1067         struct srpt_ioctx *ioctx;
1068         scst_data_direction dir;
1069         enum srpt_command_state previous_state;
1070
1071         TRACE_ENTRY();
1072
1073         ioctx = scst_cmd_get_tgt_priv(scmnd);
1074         BUG_ON(!ioctx);
1075
1076         previous_state = srpt_set_cmd_state(ioctx, SRPT_STATE_ABORTED);
1077         if (previous_state == SRPT_STATE_ABORTED)
1078                 goto out;
1079
1080         TRACE_DBG("Aborting cmd with state %d and tag %lld",
1081                   previous_state, scst_cmd_get_tag(scmnd));
1082
1083         dir = scst_cmd_get_data_direction(scmnd);
1084         if (dir != SCST_DATA_NONE && scst_cmd_get_sg(scmnd))
1085                 ib_dma_unmap_sg(sdev->device,
1086                                 scst_cmd_get_sg(scmnd),
1087                                 scst_cmd_get_sg_cnt(scmnd),
1088                                 scst_to_tgt_dma_dir(dir));
1089
1090         switch (previous_state) {
1091         case SRPT_STATE_NEW:
1092                 scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
1093                 break;
1094         case SRPT_STATE_NEED_DATA:
1095                 WARN_ON(scst_cmd_get_data_direction(ioctx->scmnd)
1096                         == SCST_DATA_READ);
1097                 scst_rx_data(scmnd, SCST_RX_STATUS_ERROR,
1098                              scst_estimate_context());
1099                 break;
1100         case SRPT_STATE_DATA_IN:
1101         case SRPT_STATE_PROCESSED:
1102                 scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
1103                 break;
1104         default:
1105                 TRACE_DBG("Aborting cmd with state %d", previous_state);
1106                 WARN_ON("ERROR: unexpected command state");
1107         }
1108         scst_tgt_cmd_done(scmnd, scst_estimate_context());
1109
1110 out:
1111         ;
1112
1113         TRACE_EXIT();
1114 }
1115
1116 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
1117 {
1118         struct srpt_ioctx *ioctx;
1119         struct srpt_device *sdev = ch->sport->sdev;
1120
1121         if (wc->wr_id & SRPT_OP_RECV) {
1122                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
1123                 PRINT_ERROR("%s", "This is serious - SRQ is in bad state.");
1124         } else {
1125                 ioctx = sdev->ioctx_ring[wc->wr_id];
1126
1127                 if (ioctx->scmnd)
1128                         srpt_abort_scst_cmd(sdev, ioctx->scmnd);
1129                 else
1130                         srpt_reset_ioctx(ch, ioctx);
1131         }
1132 }
1133
1134 /** Process an IB send completion notification. */
1135 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
1136                                   struct srpt_ioctx *ioctx,
1137                                   enum scst_exec_context context)
1138 {
1139         if (ioctx->scmnd) {
1140                 scst_data_direction dir =
1141                         scst_cmd_get_data_direction(ioctx->scmnd);
1142
1143                 if (dir != SCST_DATA_NONE && scst_cmd_get_sg(ioctx->scmnd))
1144                         ib_dma_unmap_sg(ch->sport->sdev->device,
1145                                         scst_cmd_get_sg(ioctx->scmnd),
1146                                         scst_cmd_get_sg_cnt(ioctx->scmnd),
1147                                         scst_to_tgt_dma_dir(dir));
1148                 scst_tgt_cmd_done(ioctx->scmnd, context);
1149         } else
1150                 srpt_reset_ioctx(ch, ioctx);
1151 }
1152
1153 /** Process an IB RDMA completion notification. */
1154 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
1155                                   struct srpt_ioctx *ioctx)
1156 {
1157         if (!ioctx->scmnd) {
1158                 WARN_ON("ERROR: ioctx->scmnd == NULL");
1159                 srpt_reset_ioctx(ch, ioctx);
1160                 return;
1161         }
1162
1163         /*
1164          * If an RDMA completion notification has been received for a write
1165          * command, tell SCST that processing can continue by calling
1166          * scst_rx_data().
1167          */
1168         if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
1169                                 SRPT_STATE_DATA_IN) == SRPT_STATE_NEED_DATA) {
1170                 WARN_ON(scst_cmd_get_data_direction(ioctx->scmnd)
1171                         == SCST_DATA_READ);
1172                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
1173                              scst_estimate_context());
1174         }
1175 }
1176
1177 /**
1178  * srpt_build_cmd_rsp() - Build an SRP_RSP response.
1179  * @ch: RDMA channel through which the request has been received.
1180  * @ioctx: I/O context associated with the SRP_CMD request. The response will
1181  *   be built in the buffer ioctx->buf points at and hence this function will
1182  *   overwrite the request data.
1183  * @tag: tag of the request for which this response is being generated.
1184  * @status: value for the STATUS field of the SRP_RSP information unit.
1185  * @sense_data: pointer to sense data to be included in the response.
1186  * @sense_data_len: length in bytes of the sense data.
1187  *
1188  * Returns the size in bytes of the SRP_RSP response.
1189  *
1190  * An SRP_RSP response contains a SCSI status or service response. See also
1191  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1192  * response. See also SPC-2 for more information about sense data.
1193  */
1194 static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
1195                               struct srpt_ioctx *ioctx, s32 req_lim_delta,
1196                               u64 tag, int status,
1197                               const u8 *sense_data, int sense_data_len)
1198 {
1199         struct srp_rsp *srp_rsp;
1200         int max_sense_len;
1201
1202         /*
1203          * The lowest bit of all SAM-3 status codes is zero (see also
1204          * paragraph 5.3 in SAM-3).
1205          */
1206         WARN_ON(status & 1);
1207
1208         srp_rsp = ioctx->buf;
1209         BUG_ON(!srp_rsp);
1210         memset(srp_rsp, 0, sizeof *srp_rsp);
1211
1212         srp_rsp->opcode = SRP_RSP;
1213         /*
1214          * Copy the SCSOLNT or UCSOLNT bit from the request to the SOLNT bit
1215          * of the response.
1216          */
1217         srp_rsp->sol_not
1218                 = (ioctx->sol_not
1219                    & (status == SAM_STAT_GOOD ? SRP_SCSOLNT : SRP_UCSOLNT))
1220                 ? SRP_SOLNT : 0;
1221         srp_rsp->req_lim_delta = cpu_to_be32(req_lim_delta);
1222         srp_rsp->tag = tag;
1223
1224         if (SCST_SENSE_VALID(sense_data)) {
1225                 BUILD_BUG_ON(MIN_MAX_MESSAGE_SIZE <= sizeof(*srp_rsp));
1226                 max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);
1227                 if (sense_data_len > max_sense_len) {
1228                         PRINT_WARNING("truncated sense data from %d to %d"
1229                                 " bytes", sense_data_len,
1230                                 max_sense_len);
1231                         sense_data_len = max_sense_len;
1232                 }
1233
1234                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
1235                 srp_rsp->status = status;
1236                 srp_rsp->sense_data_len = cpu_to_be32(sense_data_len);
1237                 memcpy(srp_rsp + 1, sense_data, sense_data_len);
1238         } else
1239                 sense_data_len = 0;
1240
1241         return sizeof(*srp_rsp) + sense_data_len;
1242 }
1243
1244 /**
1245  * Build a task management response, which is a specific SRP_RSP response.
1246  * @ch: RDMA channel through which the request has been received.
1247  * @ioctx: I/O context in which the SRP_RSP response will be built.
1248  * @rsp_code: RSP_CODE that will be stored in the response.
1249  * @tag: tag of the request for which this response is being generated.
1250  *
1251  * Returns the size in bytes of the SRP_RSP response.
1252  *
1253  * An SRP_RSP response contains a SCSI status or service response. See also
1254  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1255  * response.
1256  */
1257 static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
1258                                   struct srpt_ioctx *ioctx, s32 req_lim_delta,
1259                                   u8 rsp_code, u64 tag)
1260 {
1261         struct srp_rsp *srp_rsp;
1262         int resp_data_len;
1263         int resp_len;
1264
1265         resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4;
1266         resp_len = sizeof(*srp_rsp) + resp_data_len;
1267
1268         srp_rsp = ioctx->buf;
1269         memset(srp_rsp, 0, sizeof *srp_rsp);
1270
1271         srp_rsp->opcode = SRP_RSP;
1272         /*
1273          * Copy the SCSOLNT or UCSOLNT bit from the request to the SOLNT bit
1274          * of the response.
1275          */
1276         srp_rsp->sol_not
1277                 = (ioctx->sol_not
1278                    & (rsp_code == SRP_TSK_MGMT_SUCCESS
1279                       ? SRP_SCSOLNT : SRP_UCSOLNT))
1280                 ? SRP_SOLNT : 0;
1281         srp_rsp->req_lim_delta = cpu_to_be32(req_lim_delta);
1282         srp_rsp->tag = tag;
1283
1284         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
1285                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
1286                 srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
1287                 srp_rsp->data[3] = rsp_code;
1288         }
1289
1290         return resp_len;
1291 }
1292
1293 /*
1294  * Process SRP_CMD.
1295  */
1296 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
1297 {
1298         struct scst_cmd *scmnd;
1299         struct srp_cmd *srp_cmd;
1300         scst_data_direction dir;
1301         u64 data_len;
1302         int ret;
1303
1304         srp_cmd = ioctx->buf;
1305
1306         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
1307                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
1308                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
1309         if (!scmnd)
1310                 goto err;
1311
1312         ioctx->scmnd = scmnd;
1313
1314         ret = srpt_get_desc_tbl(ioctx, srp_cmd, &dir, &data_len);
1315         if (ret) {
1316                 scst_set_cmd_error(scmnd,
1317                         SCST_LOAD_SENSE(scst_sense_invalid_field_in_cdb));
1318                 goto err;
1319         }
1320
1321         switch (srp_cmd->task_attr) {
1322         case SRP_CMD_HEAD_OF_Q:
1323                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
1324                 break;
1325         case SRP_CMD_ORDERED_Q:
1326                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1327                 break;
1328         case SRP_CMD_SIMPLE_Q:
1329                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
1330                 break;
1331         case SRP_CMD_ACA:
1332                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
1333                 break;
1334         default:
1335                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1336                 break;
1337         }
1338
1339         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1340         scst_cmd_set_tgt_priv(scmnd, ioctx);
1341         scst_cmd_set_expected(scmnd, dir, data_len);
1342         scst_cmd_init_done(scmnd, scst_estimate_context());
1343
1344         return 0;
1345
1346 err:
1347         return -1;
1348 }
1349
1350 /*
1351  * srpt_handle_tsk_mgmt() - Process an SRP_TSK_MGMT information unit.
1352  *
1353  * Returns SRP_TSK_MGMT_SUCCESS upon success.
1354  *
1355  * Each task management function is performed by calling one of the
1356  * scst_rx_mgmt_fn*() functions. These functions will either report failure
1357  * or process the task management function asynchronously. The function
1358  * srpt_tsk_mgmt_done() will be called by the SCST core upon completion of the
1359  * task management function. When srpt_handle_tsk_mgmt() reports failure
1360  * (i.e. returns -1) a response will have been built in ioctx->buf. This
1361  * information unit has to be sent back by the caller.
1362  *
1363  * For more information about SRP_TSK_MGMT information units, see also section
1364  * 6.7 in the T10 SRP r16a document.
1365  */
1366 static u8 srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1367                                struct srpt_ioctx *ioctx)
1368 {
1369         struct srp_tsk_mgmt *srp_tsk;
1370         struct srpt_mgmt_ioctx *mgmt_ioctx;
1371         int ret;
1372         u8 srp_tsk_mgmt_status;
1373
1374         srp_tsk = ioctx->buf;
1375
1376         TRACE_DBG("recv_tsk_mgmt= %d for task_tag= %lld"
1377                   " using tag= %lld cm_id= %p sess= %p",
1378                   srp_tsk->tsk_mgmt_func,
1379                   (unsigned long long) srp_tsk->task_tag,
1380                   (unsigned long long) srp_tsk->tag,
1381                   ch->cm_id, ch->scst_sess);
1382
1383         srp_tsk_mgmt_status = SRP_TSK_MGMT_FAILED;
1384         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1385         if (!mgmt_ioctx)
1386                 goto err;
1387
1388         mgmt_ioctx->ioctx = ioctx;
1389         mgmt_ioctx->ch = ch;
1390         mgmt_ioctx->tag = srp_tsk->tag;
1391
1392         switch (srp_tsk->tsk_mgmt_func) {
1393         case SRP_TSK_ABORT_TASK:
1394                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK");
1395                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1396                                           SCST_ABORT_TASK,
1397                                           srp_tsk->task_tag,
1398                                           thread ?
1399                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1400                                           mgmt_ioctx);
1401                 break;
1402         case SRP_TSK_ABORT_TASK_SET:
1403                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK_SET");
1404                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1405                                           SCST_ABORT_TASK_SET,
1406                                           (u8 *) &srp_tsk->lun,
1407                                           sizeof srp_tsk->lun,
1408                                           thread ?
1409                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1410                                           mgmt_ioctx);
1411                 break;
1412         case SRP_TSK_CLEAR_TASK_SET:
1413                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_TASK_SET");
1414                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1415                                           SCST_CLEAR_TASK_SET,
1416                                           (u8 *) &srp_tsk->lun,
1417                                           sizeof srp_tsk->lun,
1418                                           thread ?
1419                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1420                                           mgmt_ioctx);
1421                 break;
1422         case SRP_TSK_LUN_RESET:
1423                 TRACE_DBG("%s", "Processing SRP_TSK_LUN_RESET");
1424                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1425                                           SCST_LUN_RESET,
1426                                           (u8 *) &srp_tsk->lun,
1427                                           sizeof srp_tsk->lun,
1428                                           thread ?
1429                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1430                                           mgmt_ioctx);
1431                 break;
1432         case SRP_TSK_CLEAR_ACA:
1433                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_ACA");
1434                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1435                                           SCST_CLEAR_ACA,
1436                                           (u8 *) &srp_tsk->lun,
1437                                           sizeof srp_tsk->lun,
1438                                           thread ?
1439                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1440                                           mgmt_ioctx);
1441                 break;
1442         default:
1443                 TRACE_DBG("%s", "Unsupported task management function.");
1444                 srp_tsk_mgmt_status = SRP_TSK_MGMT_FUNC_NOT_SUPP;
1445                 goto err;
1446         }
1447
1448         if (ret) {
1449                 TRACE_DBG("Processing task management function failed"
1450                           " (ret = %d).", ret);
1451                 goto err;
1452         }
1453         return SRP_TSK_MGMT_SUCCESS;
1454
1455 err:
1456         kfree(mgmt_ioctx);
1457         return srp_tsk_mgmt_status;
1458 }
1459
1460 /**
1461  * Process a newly received information unit.
1462  * @ch: RDMA channel through which the information unit has been received.
1463  * @ioctx: SRPT I/O context associated with the information unit.
1464  */
1465 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1466                                struct srpt_ioctx *ioctx)
1467 {
1468         struct srp_cmd *srp_cmd;
1469         enum rdma_ch_state ch_state;
1470         u8 srp_response_status;
1471         u8 srp_tsk_mgmt_status;
1472         int len;
1473
1474         /*
1475          * A quote from SAM-3, paragraph 4.9.6: "Any command that is not
1476          * relayed to a dependent logical unit shall be terminated with a
1477          * CHECK CONDITION status. The sense key shall be set to ILLEGAL
1478          * REQUEST and the additional sense code shall be set to INVALID
1479          * COMMAND OPERATION CODE. If a task management function cannot be
1480          * relayed to a dependent logical unit, a service response of SERVICE
1481          * DELIVERY OR TARGET FAILURE shall be returned."
1482          */
1483
1484         srp_response_status = SAM_STAT_BUSY;
1485         /* To keep the compiler happy. */
1486         srp_tsk_mgmt_status = -1;
1487
1488         ch_state = atomic_read(&ch->state);
1489         if (ch_state == RDMA_CHANNEL_CONNECTING) {
1490                 list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1491                 return;
1492         } else if (ch_state == RDMA_CHANNEL_DISCONNECTING) {
1493                 srpt_reset_ioctx(ch, ioctx);
1494                 return;
1495         }
1496
1497         WARN_ON(ch_state != RDMA_CHANNEL_LIVE);
1498
1499         ib_dma_sync_single_for_cpu(ch->sport->sdev->device,
1500                                    ioctx->dma, srp_max_message_size,
1501                                    DMA_FROM_DEVICE);
1502
1503         srp_cmd = ioctx->buf;
1504
1505         ioctx->n_rbuf = 0;
1506         ioctx->rbufs = NULL;
1507         ioctx->n_rdma = 0;
1508         ioctx->n_rdma_ius = 0;
1509         ioctx->rdma_ius = NULL;
1510         ioctx->scmnd = NULL;
1511         ioctx->ch = ch;
1512         ioctx->sol_not = srp_cmd->sol_not;
1513         atomic_set(&ioctx->state, SRPT_STATE_NEW);
1514
1515         switch (srp_cmd->opcode) {
1516         case SRP_CMD:
1517                 if (srpt_handle_cmd(ch, ioctx) < 0) {
1518                         if (ioctx->scmnd)
1519                                 srp_response_status =
1520                                         scst_cmd_get_status(ioctx->scmnd);
1521                         goto err;
1522                 }
1523                 break;
1524
1525         case SRP_TSK_MGMT:
1526                 srp_tsk_mgmt_status = srpt_handle_tsk_mgmt(ch, ioctx);
1527                 if (srp_tsk_mgmt_status != SRP_TSK_MGMT_SUCCESS)
1528                         goto err;
1529                 break;
1530
1531         case SRP_CRED_RSP:
1532                 TRACE_DBG("%s", "received SRP_CRED_RSP");
1533                 srpt_reset_ioctx(ch, ioctx);
1534                 break;
1535
1536         case SRP_AER_RSP:
1537                 TRACE_DBG("%s", "received SRP_AER_RSP");
1538                 srpt_reset_ioctx(ch, ioctx);
1539                 break;
1540
1541         case SRP_I_LOGOUT:
1542         default:
1543                 goto err;
1544         }
1545
1546         return;
1547
1548 err:
1549         ch_state = atomic_read(&ch->state);
1550         if (ch_state != RDMA_CHANNEL_LIVE) {
1551                 /* Give up if another thread modified the channel state. */
1552                 PRINT_ERROR("%s: channel is in state %d", __func__, ch_state);
1553                 srpt_reset_ioctx(ch, ioctx);
1554         } else {
1555                 s32 req_lim_delta;
1556
1557                 req_lim_delta = srpt_req_lim_delta(ch) + 1;
1558                 if (srp_cmd->opcode == SRP_TSK_MGMT) {
1559                         len = srpt_build_tskmgmt_rsp(ch, ioctx, req_lim_delta,
1560                                      srp_tsk_mgmt_status,
1561                                      ((struct srp_tsk_mgmt *)srp_cmd)->tag);
1562                 } else if (ioctx->scmnd)
1563                         len = srpt_build_cmd_rsp(ch, ioctx, req_lim_delta,
1564                                 srp_cmd->tag, srp_response_status,
1565                                 scst_cmd_get_sense_buffer(ioctx->scmnd),
1566                                 scst_cmd_get_sense_buffer_len(ioctx->scmnd));
1567                 else {
1568                         len = srpt_build_cmd_rsp(ch, ioctx, srp_cmd->tag,
1569                                                  req_lim_delta,
1570                                                  srp_response_status,
1571                                                  NULL, 0);
1572                 }
1573                 if (srpt_post_send(ch, ioctx, len)) {
1574                         PRINT_ERROR("%s", "Sending SRP_RSP response failed.");
1575                         atomic_sub(req_lim_delta, &ch->last_response_req_lim);
1576                         srpt_reset_ioctx(ch, ioctx);
1577                 }
1578         }
1579 }
1580
1581 /*
1582  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1583  * should stop.
1584  * @pre thread != 0
1585  */
1586 static inline int srpt_test_ioctx_list(void)
1587 {
1588         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1589                    unlikely(kthread_should_stop()));
1590         return res;
1591 }
1592
1593 /*
1594  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1595  *
1596  * @pre thread != 0
1597  */
1598 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1599 {
1600         unsigned long flags;
1601
1602         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1603         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1604         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1605         wake_up(&ioctx_list_waitQ);
1606 }
1607
1608 /**
1609  * InfiniBand completion queue callback function.
1610  * @cq: completion queue.
1611  * @ctx: completion queue context, which was passed as the fourth argument of
1612  *       the function ib_create_cq().
1613  */
1614 static void srpt_completion(struct ib_cq *cq, void *ctx)
1615 {
1616         struct srpt_rdma_ch *ch = ctx;
1617         struct srpt_device *sdev = ch->sport->sdev;
1618         struct ib_wc wc;
1619         struct srpt_ioctx *ioctx;
1620
1621         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1622         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1623                 if (wc.status) {
1624                         PRINT_INFO("%s failed with status %d",
1625                                    wc.wr_id & SRPT_OP_RECV
1626                                    ? "receiving"
1627                                    : "sending response",
1628                                    wc.status);
1629                         srpt_handle_err_comp(ch, &wc);
1630                         continue;
1631                 }
1632
1633                 if (wc.wr_id & SRPT_OP_RECV) {
1634                         int req_lim;
1635
1636                         req_lim = atomic_dec_return(&ch->req_lim);
1637                         if (req_lim < 0)
1638                                 PRINT_ERROR("internal error: req_lim = %d < 0",
1639                                             req_lim);
1640                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1641                         if (thread) {
1642                                 ioctx->ch = ch;
1643                                 ioctx->op = IB_WC_RECV;
1644                                 srpt_schedule_thread(ioctx);
1645                         } else
1646                                 srpt_handle_new_iu(ch, ioctx);
1647                 } else {
1648                         ioctx = sdev->ioctx_ring[wc.wr_id];
1649                         if (wc.opcode == IB_WC_SEND)
1650                                 atomic_inc(&ch->qp_wr_avail);
1651                         else {
1652                                 WARN_ON(wc.opcode != IB_WC_RDMA_READ);
1653                                 WARN_ON(ioctx->n_rdma <= 0);
1654                                 atomic_add(ioctx->n_rdma,
1655                                            &ch->qp_wr_avail);
1656                         }
1657                         if (thread) {
1658                                 ioctx->ch = ch;
1659                                 ioctx->op = wc.opcode;
1660                                 srpt_schedule_thread(ioctx);
1661                         } else {
1662                                 switch (wc.opcode) {
1663                                 case IB_WC_SEND:
1664                                         srpt_handle_send_comp(ch, ioctx,
1665                                                 scst_estimate_context());
1666                                         break;
1667                                 case IB_WC_RDMA_WRITE:
1668                                 case IB_WC_RDMA_READ:
1669                                         srpt_handle_rdma_comp(ch, ioctx);
1670                                         break;
1671                                 default:
1672                                         PRINT_ERROR("received unrecognized"
1673                                                     " IB WC opcode %d",
1674                                                     wc.opcode);
1675                                         break;
1676                                 }
1677                         }
1678                 }
1679
1680 #if defined(CONFIG_SCST_DEBUG)
1681                 if (interrupt_processing_delay_in_us <= MAX_UDELAY_MS * 1000)
1682                         udelay(interrupt_processing_delay_in_us);
1683 #endif
1684         }
1685 }
1686
1687 /*
1688  * Create a completion queue on the specified device.
1689  */
1690 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1691 {
1692         struct ib_qp_init_attr *qp_init;
1693         struct srpt_device *sdev = ch->sport->sdev;
1694         int cqe;
1695         int ret;
1696
1697         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1698         if (!qp_init)
1699                 return -ENOMEM;
1700
1701         /* Create a completion queue (CQ). */
1702
1703         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1704 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1705         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1706 #else
1707         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1708 #endif
1709         if (IS_ERR(ch->cq)) {
1710                 ret = PTR_ERR(ch->cq);
1711                 PRINT_ERROR("failed to create_cq cqe= %d ret= %d", cqe, ret);
1712                 goto out;
1713         }
1714
1715         /* Request completion notification. */
1716
1717         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1718
1719         /* Create a queue pair (QP). */
1720
1721         qp_init->qp_context = (void *)ch;
1722         qp_init->event_handler
1723                 = (void(*)(struct ib_event *, void*))srpt_qp_event;
1724         qp_init->send_cq = ch->cq;
1725         qp_init->recv_cq = ch->cq;
1726         qp_init->srq = sdev->srq;
1727         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1728         qp_init->qp_type = IB_QPT_RC;
1729         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1730         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1731
1732         ch->qp = ib_create_qp(sdev->pd, qp_init);
1733         if (IS_ERR(ch->qp)) {
1734                 ret = PTR_ERR(ch->qp);
1735                 ib_destroy_cq(ch->cq);
1736                 PRINT_ERROR("failed to create_qp ret= %d", ret);
1737                 goto out;
1738         }
1739
1740         atomic_set(&ch->qp_wr_avail, qp_init->cap.max_send_wr);
1741
1742         TRACE_DBG("%s: max_cqe= %d max_sge= %d cm_id= %p",
1743                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1744                ch->cm_id);
1745
1746         /* Modify the attributes and the state of queue pair ch->qp. */
1747
1748         ret = srpt_init_ch_qp(ch, ch->qp);
1749         if (ret) {
1750                 ib_destroy_qp(ch->qp);
1751                 ib_destroy_cq(ch->cq);
1752                 goto out;
1753         }
1754
1755 out:
1756         kfree(qp_init);
1757         return ret;
1758 }
1759
1760 /**
1761  * Release the channel corresponding to the specified cm_id.
1762  *
1763  * Note: must be called from inside srpt_cm_handler to avoid a race between
1764  * accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one()
1765  * (the caller of srpt_cm_handler holds the cm_id spinlock;
1766  * srpt_remove_one() waits until all SCST sessions for the associated
1767  * IB device have been unregistered and SCST session registration involves
1768  * a call to ib_destroy_cm_id(), which locks the cm_id spinlock and hence
1769  * waits until this function has finished).
1770  */
1771 static void srpt_release_channel_by_cmid(struct ib_cm_id *cm_id)
1772 {
1773         struct srpt_device *sdev;
1774         struct srpt_rdma_ch *ch;
1775         bool found;
1776
1777         TRACE_ENTRY();
1778
1779         sdev = cm_id->context;
1780         BUG_ON(!sdev);
1781         found = false;
1782         spin_lock_irq(&sdev->spinlock);
1783         list_for_each_entry(ch, &sdev->rch_list, list) {
1784                 if (ch->cm_id == cm_id) {
1785                         list_del(&ch->list);
1786                         atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING);
1787                         found = true;
1788                         break;
1789                 }
1790         }
1791         spin_unlock_irq(&sdev->spinlock);
1792
1793         if (found)
1794                 scst_unregister_session(ch->scst_sess, 0, srpt_release_channel);
1795
1796         TRACE_EXIT();
1797 }
1798
1799 /**
1800  * Look up the RDMA channel that corresponds to the specified cm_id.
1801  *
1802  * Return NULL if no matching RDMA channel has been found.
1803  */
1804 static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev,
1805                                               struct ib_cm_id *cm_id)
1806 {
1807         struct srpt_rdma_ch *ch;
1808         bool found;
1809
1810         BUG_ON(!sdev);
1811         found = false;
1812         spin_lock_irq(&sdev->spinlock);
1813         list_for_each_entry(ch, &sdev->rch_list, list) {
1814                 if (ch->cm_id == cm_id) {
1815                         found = true;
1816                         break;
1817                 }
1818         }
1819         spin_unlock_irq(&sdev->spinlock);
1820
1821         return found ? ch : NULL;
1822 }
1823
1824 /**
1825  * Release all resources associated with an RDMA channel.
1826  *
1827  * Notes:
1828  * - The caller must have removed the channel from the channel list before
1829  *   calling this function.
1830  * - Must be called as a callback function via scst_unregister_session(). Never
1831  *   call this function directly because doing so would trigger several race
1832  *   conditions.
1833  * - Do not access ch->sport or ch->sport->sdev in this function because the
1834  *   memory that was allocated for the sport and/or sdev data structures may
1835  *   already have been freed at the time this function is called.
1836  */
1837 static void srpt_release_channel(struct scst_session *scst_sess)
1838 {
1839         struct srpt_rdma_ch *ch;
1840
1841         TRACE_ENTRY();
1842
1843         ch = scst_sess_get_tgt_priv(scst_sess);
1844         BUG_ON(!ch);
1845         WARN_ON(atomic_read(&ch->state) != RDMA_CHANNEL_DISCONNECTING);
1846
1847         TRACE_DBG("destroying cm_id %p", ch->cm_id);
1848         BUG_ON(!ch->cm_id);
1849         ib_destroy_cm_id(ch->cm_id);
1850
1851         ib_destroy_qp(ch->qp);
1852         ib_destroy_cq(ch->cq);
1853         kfree(ch);
1854
1855         TRACE_EXIT();
1856 }
1857
1858 /**
1859  * Process the event IB_CM_REQ_RECEIVED.
1860  *
1861  * Ownership of the cm_id is transferred to the SCST session if this functions
1862  * returns zero. Otherwise the caller remains the owner of cm_id.
1863  */
1864 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1865                             struct ib_cm_req_event_param *param,
1866                             void *private_data)
1867 {
1868         struct srpt_device *sdev = cm_id->context;
1869         struct srp_login_req *req;
1870         struct srp_login_rsp *rsp;
1871         struct srp_login_rej *rej;
1872         struct ib_cm_rep_param *rep_param;
1873         struct srpt_rdma_ch *ch, *tmp_ch;
1874         u32 it_iu_len;
1875         int ret = 0;
1876
1877 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
1878         WARN_ON(!sdev || !private_data);
1879         if (!sdev || !private_data)
1880                 return -EINVAL;
1881 #else
1882         if (WARN_ON(!sdev || !private_data))
1883                 return -EINVAL;
1884 #endif
1885
1886         req = (struct srp_login_req *)private_data;
1887
1888         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1889
1890         PRINT_INFO("Received SRP_LOGIN_REQ with"
1891             " i_port_id 0x%llx:0x%llx, t_port_id 0x%llx:0x%llx and it_iu_len %d"
1892             " on port %d (guid=0x%llx:0x%llx)",
1893             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1894             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1895             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1896             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1897             it_iu_len,
1898             param->port,
1899             (unsigned long long)be64_to_cpu(*(u64 *)
1900                                 &sdev->port[param->port - 1].gid.raw[0]),
1901             (unsigned long long)be64_to_cpu(*(u64 *)
1902                                 &sdev->port[param->port - 1].gid.raw[8]));
1903
1904         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1905         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1906         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1907
1908         if (!rsp || !rej || !rep_param) {
1909                 ret = -ENOMEM;
1910                 goto out;
1911         }
1912
1913         if (it_iu_len > srp_max_message_size || it_iu_len < 64) {
1914                 rej->reason =
1915                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1916                 ret = -EINVAL;
1917                 PRINT_ERROR("rejected SRP_LOGIN_REQ because its"
1918                             " length (%d bytes) is out of range (%d .. %d)",
1919                             it_iu_len, 64, srp_max_message_size);
1920                 goto reject;
1921         }
1922
1923         if ((req->req_flags & SRP_MTCH_ACTION) == SRP_MULTICHAN_SINGLE) {
1924                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1925
1926                 spin_lock_irq(&sdev->spinlock);
1927
1928                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1929                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1930                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1931                             && param->port == ch->sport->port
1932                             && param->listen_id == ch->sport->sdev->cm_id
1933                             && ch->cm_id) {
1934                                 enum rdma_ch_state prev_state;
1935
1936                                 /* found an existing channel */
1937                                 TRACE_DBG("Found existing channel name= %s"
1938                                           " cm_id= %p state= %d",
1939                                           ch->sess_name, ch->cm_id,
1940                                           atomic_read(&ch->state));
1941
1942                                 prev_state = atomic_xchg(&ch->state,
1943                                                 RDMA_CHANNEL_DISCONNECTING);
1944                                 if (prev_state == RDMA_CHANNEL_CONNECTING)
1945                                         list_del(&ch->list);
1946
1947                                 spin_unlock_irq(&sdev->spinlock);
1948
1949                                 rsp->rsp_flags =
1950                                         SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1951
1952                                 if (prev_state == RDMA_CHANNEL_LIVE) {
1953                                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
1954                                         PRINT_INFO("disconnected"
1955                                           " session %s because a new"
1956                                           " SRP_LOGIN_REQ has been received.",
1957                                           ch->sess_name);
1958                                 } else if (prev_state ==
1959                                          RDMA_CHANNEL_CONNECTING) {
1960                                         PRINT_ERROR("%s", "rejected"
1961                                           " SRP_LOGIN_REQ because another login"
1962                                           " request is being processed.");
1963                                         ib_send_cm_rej(ch->cm_id,
1964                                                        IB_CM_REJ_NO_RESOURCES,
1965                                                        NULL, 0, NULL, 0);
1966                                         scst_unregister_session(ch->scst_sess,
1967                                                         0,
1968                                                         srpt_release_channel);
1969                                 }
1970
1971                                 spin_lock_irq(&sdev->spinlock);
1972                         }
1973                 }
1974
1975                 spin_unlock_irq(&sdev->spinlock);
1976
1977         } else
1978                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1979
1980         if (((u64) (*(u64 *) req->target_port_id) !=
1981              cpu_to_be64(srpt_service_guid)) ||
1982             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1983              cpu_to_be64(srpt_service_guid))) {
1984                 rej->reason =
1985                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1986                 ret = -ENOMEM;
1987                 PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because it"
1988                        " has an invalid target port identifier.");
1989                 goto reject;
1990         }
1991
1992         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1993         if (!ch) {
1994                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1995                 PRINT_ERROR("%s",
1996                             "rejected SRP_LOGIN_REQ because out of memory.");
1997                 ret = -ENOMEM;
1998                 goto reject;
1999         }
2000
2001         memcpy(ch->i_port_id, req->initiator_port_id, 16);
2002         memcpy(ch->t_port_id, req->target_port_id, 16);
2003         ch->sport = &sdev->port[param->port - 1];
2004         ch->losolnt = req->req_flags & SRP_LOSOLNT ? 1 : 0;
2005         ch->crsolnt = req->req_flags & SRP_CRSOLNT ? 1 : 0;
2006         ch->aesolnt = req->req_flags & SRP_AESOLNT ? 1 : 0;
2007         ch->cm_id = cm_id;
2008         atomic_set(&ch->state, RDMA_CHANNEL_CONNECTING);
2009         INIT_LIST_HEAD(&ch->cmd_wait_list);
2010
2011         ret = srpt_create_ch_ib(ch);
2012         if (ret) {
2013                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2014                 PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because creating"
2015                             " a new RDMA channel failed.");
2016                 goto free_ch;
2017         }
2018
2019         ret = srpt_ch_qp_rtr(ch, ch->qp);
2020         if (ret) {
2021                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2022                 PRINT_ERROR("rejected SRP_LOGIN_REQ because enabling"
2023                        " RTR failed (error code = %d)", ret);
2024                 goto destroy_ib;
2025         }
2026
2027         if (use_port_guid_in_session_name) {
2028                 /*
2029                  * If the kernel module parameter use_port_guid_in_session_name
2030                  * has been specified, use a combination of the target port
2031                  * GUID and the initiator port ID as the session name. This
2032                  * was the original behavior of the SRP target implementation
2033                  * (i.e. before the SRPT was included in OFED 1.3).
2034                  */
2035                 snprintf(ch->sess_name, sizeof(ch->sess_name),
2036                          "0x%016llx%016llx",
2037                          (unsigned long long)be64_to_cpu(*(u64 *)
2038                                 &sdev->port[param->port - 1].gid.raw[8]),
2039                          (unsigned long long)be64_to_cpu(*(u64 *)
2040                                 (ch->i_port_id + 8)));
2041         } else {
2042                 /*
2043                  * Default behavior: use the initator port identifier as the
2044                  * session name.
2045                  */
2046                 snprintf(ch->sess_name, sizeof(ch->sess_name),
2047                          "0x%016llx%016llx",
2048                          (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
2049                          (unsigned long long)be64_to_cpu(*(u64 *)
2050                                  (ch->i_port_id + 8)));
2051         }
2052
2053         TRACE_DBG("registering session %s", ch->sess_name);
2054
2055         BUG_ON(!sdev->scst_tgt);
2056         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
2057                                               NULL, NULL);
2058         if (!ch->scst_sess) {
2059                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2060                 TRACE_DBG("%s", "Failed to create scst sess");
2061                 goto destroy_ib;
2062         }
2063
2064         TRACE_DBG("Establish connection sess=%p name=%s cm_id=%p",
2065                   ch->scst_sess, ch->sess_name, ch->cm_id);
2066
2067         scst_sess_set_tgt_priv(ch->scst_sess, ch);
2068
2069         /* create srp_login_response */
2070         rsp->opcode = SRP_LOGIN_RSP;
2071         rsp->tag = req->tag;
2072         rsp->max_it_iu_len = req->req_it_iu_len;
2073         rsp->max_ti_iu_len = req->req_it_iu_len;
2074         ch->max_ti_iu_len = req->req_it_iu_len;
2075         rsp->buf_fmt =
2076             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
2077         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
2078         atomic_set(&ch->req_lim, SRPT_RQ_SIZE);
2079         atomic_set(&ch->last_response_req_lim, SRPT_RQ_SIZE);
2080
2081         /* create cm reply */
2082         rep_param->qp_num = ch->qp->qp_num;
2083         rep_param->private_data = (void *)rsp;
2084         rep_param->private_data_len = sizeof *rsp;
2085         rep_param->rnr_retry_count = 7;
2086         rep_param->flow_control = 1;
2087         rep_param->failover_accepted = 0;
2088         rep_param->srq = 1;
2089         rep_param->responder_resources = 4;
2090         rep_param->initiator_depth = 4;
2091
2092         ret = ib_send_cm_rep(cm_id, rep_param);
2093         if (ret) {
2094                 PRINT_ERROR("sending SRP_LOGIN_REQ response failed"
2095                             " (error code = %d)", ret);
2096                 goto release_channel;
2097         }
2098
2099         spin_lock_irq(&sdev->spinlock);
2100         list_add_tail(&ch->list, &sdev->rch_list);
2101         spin_unlock_irq(&sdev->spinlock);
2102
2103         goto out;
2104
2105 release_channel:
2106         atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING);
2107         scst_unregister_session(ch->scst_sess, 0, NULL);
2108         ch->scst_sess = NULL;
2109
2110 destroy_ib:
2111         ib_destroy_qp(ch->qp);
2112         ib_destroy_cq(ch->cq);
2113
2114 free_ch:
2115         kfree(ch);
2116
2117 reject:
2118         rej->opcode = SRP_LOGIN_REJ;
2119         rej->tag = req->tag;
2120         rej->buf_fmt =
2121             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
2122
2123         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
2124                              (void *)rej, sizeof *rej);
2125
2126 out:
2127         kfree(rep_param);
2128         kfree(rsp);
2129         kfree(rej);
2130
2131         return ret;
2132 }
2133
2134 static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
2135 {
2136         PRINT_INFO("Received InfiniBand REJ packet for cm_id %p.", cm_id);
2137         srpt_release_channel_by_cmid(cm_id);
2138 }
2139
2140 /**
2141  * Process an IB_CM_RTU_RECEIVED or IB_CM_USER_ESTABLISHED event.
2142  *
2143  * An IB_CM_RTU_RECEIVED message indicates that the connection is established
2144  * and that the recipient may begin transmitting (RTU = ready to use).
2145  */
2146 static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
2147 {
2148         struct srpt_rdma_ch *ch;
2149         int ret;
2150
2151         ch = srpt_find_channel(cm_id->context, cm_id);
2152         WARN_ON(!ch);
2153         if (!ch)
2154                 goto out;
2155
2156         if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_CONNECTING,
2157                         RDMA_CHANNEL_LIVE) == RDMA_CHANNEL_CONNECTING) {
2158                 struct srpt_ioctx *ioctx, *ioctx_tmp;
2159
2160                 ret = srpt_ch_qp_rts(ch, ch->qp);
2161
2162                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
2163                                          wait_list) {
2164                         list_del(&ioctx->wait_list);
2165                         srpt_handle_new_iu(ch, ioctx);
2166                 }
2167                 if (ret && srpt_test_and_set_channel_state(ch,
2168                         RDMA_CHANNEL_LIVE,
2169                         RDMA_CHANNEL_DISCONNECTING) == RDMA_CHANNEL_LIVE) {
2170                         TRACE_DBG("cm_id=%p sess_name=%s state=%d",
2171                                   cm_id, ch->sess_name,
2172                                   atomic_read(&ch->state));
2173                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
2174                 }
2175         }
2176
2177 out:
2178         ;
2179 }
2180
2181 static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
2182 {
2183         PRINT_INFO("Received InfiniBand TimeWait exit for cm_id %p.", cm_id);
2184         srpt_release_channel_by_cmid(cm_id);
2185 }
2186
2187 static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
2188 {
2189         PRINT_INFO("Received InfiniBand REP error for cm_id %p.", cm_id);
2190         srpt_release_channel_by_cmid(cm_id);
2191 }
2192
2193 static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
2194 {
2195         struct srpt_rdma_ch *ch;
2196
2197         ch = srpt_find_channel(cm_id->context, cm_id);
2198         WARN_ON(!ch);
2199         if (!ch)
2200                 goto out;
2201
2202         TRACE_DBG("cm_id= %p ch->state= %d", cm_id, atomic_read(&ch->state));
2203
2204         switch (atomic_read(&ch->state)) {
2205         case RDMA_CHANNEL_LIVE:
2206         case RDMA_CHANNEL_CONNECTING:
2207                 ib_send_cm_drep(ch->cm_id, NULL, 0);
2208                 PRINT_INFO("Received DREQ and sent DREP for session %s.",
2209                            ch->sess_name);
2210                 break;
2211         case RDMA_CHANNEL_DISCONNECTING:
2212         default:
2213                 break;
2214         }
2215
2216 out:
2217         ;
2218 }
2219
2220 static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
2221 {
2222         PRINT_INFO("Received InfiniBand DREP message for cm_id %p.", cm_id);
2223         srpt_release_channel_by_cmid(cm_id);
2224 }
2225
2226 /**
2227  * IB connection manager callback function.
2228  *
2229  * A non-zero return value will cause the caller destroy the CM ID.
2230  *
2231  * Note: srpt_cm_handler() must only return a non-zero value when transferring
2232  * ownership of the cm_id to a channel by srpt_cm_req_recv() failed. Returning
2233  * a non-zero value in any other case will trigger a race with the
2234  * ib_destroy_cm_id() call in srpt_release_channel().
2235  */
2236 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2237 {
2238         int ret;
2239
2240         ret = 0;
2241         switch (event->event) {
2242         case IB_CM_REQ_RECEIVED:
2243                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
2244                                        event->private_data);
2245                 break;
2246         case IB_CM_REJ_RECEIVED:
2247                 srpt_cm_rej_recv(cm_id);
2248                 break;
2249         case IB_CM_RTU_RECEIVED:
2250         case IB_CM_USER_ESTABLISHED:
2251                 srpt_cm_rtu_recv(cm_id);
2252                 break;
2253         case IB_CM_DREQ_RECEIVED:
2254                 srpt_cm_dreq_recv(cm_id);
2255                 break;
2256         case IB_CM_DREP_RECEIVED:
2257                 srpt_cm_drep_recv(cm_id);
2258                 break;
2259         case IB_CM_TIMEWAIT_EXIT:
2260                 srpt_cm_timewait_exit(cm_id);
2261                 break;
2262         case IB_CM_REP_ERROR:
2263                 srpt_cm_rep_error(cm_id);
2264                 break;
2265         default:
2266                 PRINT_ERROR("received unrecognized IB CM event %d",
2267                             event->event);
2268                 break;
2269         }
2270
2271         return ret;
2272 }
2273
2274 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
2275                                  struct srpt_ioctx *ioctx,
2276                                  struct scst_cmd *scmnd)
2277 {
2278         struct scatterlist *scat;
2279         scst_data_direction dir;
2280         struct rdma_iu *riu;
2281         struct srp_direct_buf *db;
2282         dma_addr_t dma_addr;
2283         struct ib_sge *sge;
2284         u64 raddr;
2285         u32 rsize;
2286         u32 tsize;
2287         u32 dma_len;
2288         int count, nrdma;
2289         int i, j, k;
2290
2291         scat = scst_cmd_get_sg(scmnd);
2292         dir = scst_cmd_get_data_direction(scmnd);
2293         WARN_ON(scat == NULL);
2294         count = ib_dma_map_sg(ch->sport->sdev->device, scat,
2295                               scst_cmd_get_sg_cnt(scmnd),
2296                               scst_to_tgt_dma_dir(dir));
2297         if (unlikely(!count))
2298                 return -EBUSY;
2299
2300         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
2301                 nrdma = ioctx->n_rdma_ius;
2302         else {
2303                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
2304
2305                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
2306                                           scst_cmd_atomic(scmnd)
2307                                           ? GFP_ATOMIC : GFP_KERNEL);
2308                 if (!ioctx->rdma_ius) {
2309                         WARN_ON(scat == NULL);
2310                         ib_dma_unmap_sg(ch->sport->sdev->device,
2311                                         scat, scst_cmd_get_sg_cnt(scmnd),
2312                                         scst_to_tgt_dma_dir(dir));
2313                         return -ENOMEM;
2314                 }
2315
2316                 ioctx->n_rdma_ius = nrdma;
2317         }
2318
2319         db = ioctx->rbufs;
2320         tsize = (dir == SCST_DATA_READ) ?
2321                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2322         dma_len = sg_dma_len(&scat[0]);
2323         riu = ioctx->rdma_ius;
2324
2325         /*
2326          * For each remote desc - calculate the #ib_sge.
2327          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
2328          *      each remote desc rdma_iu is required a rdma wr;
2329          * else
2330          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
2331          *      another rdma wr
2332          */
2333         for (i = 0, j = 0;
2334              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2335                 rsize = be32_to_cpu(db->len);
2336                 raddr = be64_to_cpu(db->va);
2337                 riu->raddr = raddr;
2338                 riu->rkey = be32_to_cpu(db->key);
2339                 riu->sge_cnt = 0;
2340
2341                 /* calculate how many sge required for this remote_buf */
2342                 while (rsize > 0 && tsize > 0) {
2343
2344                         if (rsize >= dma_len) {
2345                                 tsize -= dma_len;
2346                                 rsize -= dma_len;
2347                                 raddr += dma_len;
2348
2349                                 if (tsize > 0) {
2350                                         ++j;
2351                                         if (j < count)
2352                                                 dma_len = sg_dma_len(&scat[j]);
2353                                 }
2354                         } else {
2355                                 tsize -= rsize;
2356                                 dma_len -= rsize;
2357                                 rsize = 0;
2358                         }
2359
2360                         ++riu->sge_cnt;
2361
2362                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
2363                                 ++ioctx->n_rdma;
2364                                 riu->sge =
2365                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
2366                                             scst_cmd_atomic(scmnd)
2367                                             ? GFP_ATOMIC : GFP_KERNEL);
2368                                 if (!riu->sge)
2369                                         goto free_mem;
2370
2371                                 ++riu;
2372                                 riu->sge_cnt = 0;
2373                                 riu->raddr = raddr;
2374                                 riu->rkey = be32_to_cpu(db->key);
2375                         }
2376                 }
2377
2378                 ++ioctx->n_rdma;
2379                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
2380                                    scst_cmd_atomic(scmnd)
2381                                    ? GFP_ATOMIC : GFP_KERNEL);
2382                 if (!riu->sge)
2383                         goto free_mem;
2384         }
2385
2386         db = ioctx->rbufs;
2387         scat = scst_cmd_get_sg(scmnd);
2388         tsize = (dir == SCST_DATA_READ) ?
2389                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2390         riu = ioctx->rdma_ius;
2391         dma_len = sg_dma_len(&scat[0]);
2392         dma_addr = sg_dma_address(&scat[0]);
2393
2394         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
2395         for (i = 0, j = 0;
2396              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2397                 rsize = be32_to_cpu(db->len);
2398                 sge = riu->sge;
2399                 k = 0;
2400
2401                 while (rsize > 0 && tsize > 0) {
2402                         sge->addr = dma_addr;
2403                         sge->lkey = ch->sport->sdev->mr->lkey;
2404
2405                         if (rsize >= dma_len) {
2406                                 sge->length =
2407                                         (tsize < dma_len) ? tsize : dma_len;
2408                                 tsize -= dma_len;
2409                                 rsize -= dma_len;
2410
2411                                 if (tsize > 0) {
2412                                         ++j;
2413                                         if (j < count) {
2414                                                 dma_len = sg_dma_len(&scat[j]);
2415                                                 dma_addr =
2416                                                     sg_dma_address(&scat[j]);
2417                                         }
2418                                 }
2419                         } else {
2420                                 sge->length = (tsize < rsize) ? tsize : rsize;
2421                                 tsize -= rsize;
2422                                 dma_len -= rsize;
2423                                 dma_addr += rsize;
2424                                 rsize = 0;
2425                         }
2426
2427                         ++k;
2428                         if (k == riu->sge_cnt && rsize > 0) {
2429                                 ++riu;
2430                                 sge = riu->sge;
2431                                 k = 0;
2432                         } else if (rsize > 0)
2433                                 ++sge;
2434                 }
2435         }
2436
2437         return 0;
2438
2439 free_mem:
2440         srpt_unmap_sg_to_ib_sge(ch, ioctx);
2441
2442         return -ENOMEM;
2443 }
2444
2445 static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
2446                                     struct srpt_ioctx *ioctx)
2447 {
2448         struct scst_cmd *scmnd;
2449         struct scatterlist *scat;
2450         scst_data_direction dir;
2451
2452         BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
2453
2454         while (ioctx->n_rdma)
2455                 kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
2456
2457         kfree(ioctx->rdma_ius);
2458         ioctx->rdma_ius = NULL;
2459
2460         scmnd = ioctx->scmnd;
2461         if (scmnd) {
2462                 BUG_ON(ioctx != scst_cmd_get_tgt_priv(scmnd));
2463                 scat = scst_cmd_get_sg(scmnd);
2464                 if (scat) {
2465                         dir = scst_cmd_get_data_direction(scmnd);
2466                         ib_dma_unmap_sg(ch->sport->sdev->device,
2467                                         scat, scst_cmd_get_sg_cnt(scmnd),
2468                                         scst_to_tgt_dma_dir(dir));
2469                 }
2470         }
2471 }
2472
2473 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2474                               scst_data_direction dir)
2475 {
2476         struct ib_send_wr wr;
2477         struct ib_send_wr *bad_wr;
2478         struct rdma_iu *riu;
2479         int i;
2480         int ret;
2481         int srq_wr_avail;
2482
2483         if (dir == SCST_DATA_WRITE) {
2484                 ret = -ENOMEM;
2485                 srq_wr_avail = atomic_sub_return(ioctx->n_rdma,
2486                                                  &ch->qp_wr_avail);
2487                 if (srq_wr_avail < 0) {
2488                         atomic_add(ioctx->n_rdma, &ch->qp_wr_avail);
2489                         PRINT_INFO("%s[%d]: SRQ full", __func__, __LINE__);
2490                         goto out;
2491                 }
2492         }
2493
2494         ret = 0;
2495         riu = ioctx->rdma_ius;
2496         memset(&wr, 0, sizeof wr);
2497
2498         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
2499                 wr.opcode = (dir == SCST_DATA_READ) ?
2500                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
2501                 wr.next = NULL;
2502                 wr.wr_id = ioctx->index;
2503                 wr.wr.rdma.remote_addr = riu->raddr;
2504                 wr.wr.rdma.rkey = riu->rkey;
2505                 wr.num_sge = riu->sge_cnt;
2506                 wr.sg_list = riu->sge;
2507
2508                 /* only get completion event for the last rdma wr */
2509                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
2510                         wr.send_flags = IB_SEND_SIGNALED;
2511
2512                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
2513                 if (ret)
2514                         goto out;
2515         }
2516
2517 out:
2518         return ret;
2519 }
2520
2521 /*
2522  * Start data transfer between initiator and target. Must not block.
2523  */
2524 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2525                           struct scst_cmd *scmnd)
2526 {
2527         int ret;
2528
2529         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
2530         if (ret) {
2531                 PRINT_ERROR("%s[%d] ret=%d", __func__, __LINE__, ret);
2532                 ret = SCST_TGT_RES_QUEUE_FULL;
2533                 goto out;
2534         }
2535
2536         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
2537         if (ret) {
2538                 if (ret == -EAGAIN || ret == -ENOMEM) {
2539                         PRINT_INFO("%s[%d] queue full -- ret=%d",
2540                                    __func__, __LINE__, ret);
2541                         ret = SCST_TGT_RES_QUEUE_FULL;
2542                 } else {
2543                         PRINT_ERROR("%s[%d] fatal error -- ret=%d",
2544                                     __func__, __LINE__, ret);
2545                         ret = SCST_TGT_RES_FATAL_ERROR;
2546                 }
2547                 goto out_unmap;
2548         }
2549
2550         ret = SCST_TGT_RES_SUCCESS;
2551
2552 out:
2553         return ret;
2554 out_unmap:
2555         srpt_unmap_sg_to_ib_sge(ch, ioctx);
2556         goto out;
2557 }
2558
2559 /*
2560  * Called by the SCST core to inform ib_srpt that data reception from the
2561  * initiator should start (SCST_DATA_WRITE). Must not block.
2562  */
2563 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
2564 {
2565         struct srpt_rdma_ch *ch;
2566         struct srpt_ioctx *ioctx;
2567         enum rdma_ch_state ch_state;
2568         int ret;
2569
2570         ioctx = scst_cmd_get_tgt_priv(scmnd);
2571         BUG_ON(!ioctx);
2572
2573         WARN_ON(srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA)
2574                 == SRPT_STATE_ABORTED);
2575
2576         ch = ioctx->ch;
2577         WARN_ON(ch != scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd)));
2578         BUG_ON(!ch);
2579
2580         ch_state = atomic_read(&ch->state);
2581         if (ch_state == RDMA_CHANNEL_DISCONNECTING) {
2582                 TRACE_DBG("cmd with tag %lld: channel disconnecting",
2583                           scst_cmd_get_tag(scmnd));
2584                 ret = SCST_TGT_RES_FATAL_ERROR;
2585                 goto out;
2586         } else if (ch_state == RDMA_CHANNEL_CONNECTING) {
2587                 ret = SCST_TGT_RES_QUEUE_FULL;
2588                 goto out;
2589         }
2590         ret = srpt_xfer_data(ch, ioctx, scmnd);
2591
2592 out:
2593         return ret;
2594 }
2595
2596 /**
2597  * srpt_must_wait_for_cred() - Whether or not the target must wait with
2598  * sending a response towards the initiator in order to avoid that the
2599  * initiator locks up. The Linux SRP initiator locks up when:
2600  * initiator.req_lim < req_lim_min (2 for SRP_CMD; 1 for SRP_TSK_MGMT).
2601  * no new SRP_RSP is received, or new SRP_RSP do not increase initiator.req_lim.
2602  * In order to avoid an initiator lock up, the target must not send an SRP_RSP
2603  * that keeps initiator.req_lim < req_lim_min when initiator.req_lim
2604  * < req_lim_min. when target.req_lim == req_lim_min - 1, initiator.req_lim must
2605  * also equal req_lim_min - 1 because of the credit mechanism defined in the
2606  * SRP standard. Hence wait with sending a response if that response would not
2607  * increase initiator.req_lim.
2608  */
2609 static bool srpt_must_wait_for_cred(struct srpt_rdma_ch *ch, int req_lim_min)
2610 {
2611         int req_lim;
2612         req_lim = atomic_read(&ch->req_lim);
2613
2614         return req_lim < req_lim_min
2615                 && req_lim - atomic_read(&ch->last_response_req_lim) + 1 <= 0;
2616 }
2617
2618 static void srpt_wait_for_cred(struct srpt_rdma_ch *ch, int req_lim_min)
2619 {
2620         while (unlikely(srpt_must_wait_for_cred(ch, req_lim_min)))
2621                 schedule();
2622 }
2623
2624 /**
2625  * srpt_xmit_response() - SCST callback function that transmits the response
2626  * to a SCSI command.
2627  *
2628  * Must not block.
2629  */
2630 static int srpt_xmit_response(struct scst_cmd *scmnd)
2631 {
2632         struct srpt_rdma_ch *ch;
2633         struct srpt_ioctx *ioctx;
2634         s32 req_lim_delta;
2635         int ret = SCST_TGT_RES_SUCCESS;
2636         int dir;
2637         int resp_len;
2638
2639         ioctx = scst_cmd_get_tgt_priv(scmnd);
2640         BUG_ON(!ioctx);
2641
2642         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2643         BUG_ON(!ch);
2644
2645         if (unlikely(scst_cmd_aborted(scmnd))) {
2646                 TRACE_DBG("cmd with tag %lld has been aborted",
2647                           scst_cmd_get_tag(scmnd));
2648                 srpt_abort_scst_cmd(ch->sport->sdev, scmnd);
2649                 ret = SCST_TGT_RES_SUCCESS;
2650                 goto out;
2651         }
2652
2653         if (unlikely(scst_cmd_atomic(scmnd))) {
2654                 TRACE_DBG("%s", "Switching to thread context.");
2655                 ret = SCST_TGT_RES_NEED_THREAD_CTX;
2656                 goto out;
2657         }
2658
2659         srpt_wait_for_cred(ch, 2);
2660
2661         if (srpt_set_cmd_state(ioctx, SRPT_STATE_PROCESSED)
2662             == SRPT_STATE_ABORTED) {
2663                 ret = SCST_TGT_RES_SUCCESS;
2664                 goto out;
2665         }
2666
2667         dir = scst_cmd_get_data_direction(scmnd);
2668
2669         /* For read commands, transfer the data to the initiator. */
2670         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2671                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2672                 if (ret != SCST_TGT_RES_SUCCESS) {
2673                         PRINT_ERROR("%s: tag= %lld xfer_data failed",
2674                                     __func__,
2675                                     (unsigned long long)
2676                                     scst_cmd_get_tag(scmnd));
2677                         goto out;
2678                 }
2679         }
2680
2681         scst_check_convert_sense(scmnd);
2682
2683         req_lim_delta = srpt_req_lim_delta(ch) + 1;
2684         resp_len = srpt_build_cmd_rsp(ch, ioctx, req_lim_delta,
2685                                       scst_cmd_get_tag(scmnd),
2686                                       scst_cmd_get_status(scmnd),
2687                                       scst_cmd_get_sense_buffer(scmnd),
2688                                       scst_cmd_get_sense_buffer_len(scmnd));
2689
2690         if (srpt_post_send(ch, ioctx, resp_len)) {
2691                 PRINT_ERROR("%s[%d]: ch->state= %d tag= %lld",
2692                             __func__, __LINE__, atomic_read(&ch->state),
2693                             (unsigned long long)scst_cmd_get_tag(scmnd));
2694                 atomic_sub(req_lim_delta, &ch->last_response_req_lim);
2695                 ret = SCST_TGT_RES_FATAL_ERROR;
2696         }
2697
2698 out:
2699         return ret;
2700 }
2701
2702 /**
2703  * srpt_tsk_mgmt_done() - SCST callback function that sends back the response
2704  * for a task management request.
2705  *
2706  * Must not block.
2707  */
2708 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2709 {
2710         struct srpt_rdma_ch *ch;
2711         struct srpt_mgmt_ioctx *mgmt_ioctx;
2712         struct srpt_ioctx *ioctx;
2713         s32 req_lim_delta;
2714         int rsp_len;
2715
2716         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2717         BUG_ON(!mgmt_ioctx);
2718
2719         ch = mgmt_ioctx->ch;
2720         BUG_ON(!ch);
2721
2722         ioctx = mgmt_ioctx->ioctx;
2723         BUG_ON(!ioctx);
2724
2725         TRACE_DBG("%s: tsk_mgmt_done for tag= %lld status=%d",
2726                   __func__, (unsigned long long)mgmt_ioctx->tag,
2727                   scst_mgmt_cmd_get_status(mcmnd));
2728
2729         WARN_ON(in_irq());
2730
2731         srpt_wait_for_cred(ch, 1);
2732
2733         if (srpt_set_cmd_state(ioctx, SRPT_STATE_PROCESSED)
2734             == SRPT_STATE_ABORTED)
2735                 goto out;
2736
2737         req_lim_delta = srpt_req_lim_delta(ch) + 1;
2738         rsp_len = srpt_build_tskmgmt_rsp(ch, ioctx, req_lim_delta,
2739                                          (scst_mgmt_cmd_get_status(mcmnd) ==
2740                                           SCST_MGMT_STATUS_SUCCESS) ?
2741                                          SRP_TSK_MGMT_SUCCESS :
2742                                          SRP_TSK_MGMT_FAILED,
2743                                          mgmt_ioctx->tag);
2744         if (srpt_post_send(ch, ioctx, rsp_len)) {
2745                 PRINT_ERROR("%s", "Sending SRP_RSP response failed.");
2746                 atomic_sub(req_lim_delta, &ch->last_response_req_lim);
2747         }
2748
2749         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2750
2751         kfree(mgmt_ioctx);
2752
2753 out:
2754         ;
2755 }
2756
2757 /*
2758  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2759  * to be freed. May be called in IRQ context.
2760  */
2761 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2762 {
2763         struct srpt_rdma_ch *ch;
2764         struct srpt_ioctx *ioctx;
2765
2766         ioctx = scst_cmd_get_tgt_priv(scmnd);
2767         BUG_ON(!ioctx);
2768
2769         ch = ioctx->ch;
2770         BUG_ON(!ch);
2771
2772         scst_cmd_set_tgt_priv(scmnd, NULL);
2773         srpt_set_cmd_state(ioctx, SRPT_STATE_ABORTED);
2774         ioctx->scmnd = NULL;
2775         ioctx->ch = NULL;
2776         srpt_reset_ioctx(ch, ioctx);
2777 }
2778
2779 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2780 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2781 static void srpt_refresh_port_work(void *ctx)
2782 #else
2783 static void srpt_refresh_port_work(struct work_struct *work)
2784 #endif
2785 {
2786 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2787         struct srpt_port *sport = (struct srpt_port *)ctx;
2788 #else
2789         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2790 #endif
2791
2792         srpt_refresh_port(sport);
2793 }
2794
2795 /*
2796  * Called by the SCST core to detect target adapters. Returns the number of
2797  * detected target adapters.
2798  */
2799 static int srpt_detect(struct scst_tgt_template *tp)
2800 {
2801         int device_count;
2802
2803         TRACE_ENTRY();
2804
2805         device_count = atomic_read(&srpt_device_count);
2806
2807         TRACE_EXIT_RES(device_count);
2808
2809         return device_count;
2810 }
2811
2812 /*
2813  * Callback function called by the SCST core from scst_unregister() to free up
2814  * the resources associated with device scst_tgt.
2815  */
2816 static int srpt_release(struct scst_tgt *scst_tgt)
2817 {
2818         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2819         struct srpt_rdma_ch *ch, *tmp_ch;
2820
2821         TRACE_ENTRY();
2822
2823         BUG_ON(!scst_tgt);
2824 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2825         WARN_ON(!sdev);
2826         if (!sdev)
2827                 return -ENODEV;
2828 #else
2829         if (WARN_ON(!sdev))
2830                 return -ENODEV;
2831 #endif
2832
2833 #ifdef CONFIG_SCST_PROC
2834         srpt_unregister_procfs_entry(scst_tgt->tgtt);
2835 #endif /*CONFIG_SCST_PROC*/
2836
2837         spin_lock_irq(&sdev->spinlock);
2838         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
2839                 list_del(&ch->list);
2840                 atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING);
2841                 spin_unlock_irq(&sdev->spinlock);
2842                 scst_unregister_session(ch->scst_sess, true,
2843                                         srpt_release_channel);
2844                 spin_lock_irq(&sdev->spinlock);
2845         }
2846         spin_unlock_irq(&sdev->spinlock);
2847
2848         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2849
2850         TRACE_EXIT();
2851
2852         return 0;
2853 }
2854
2855 /*
2856  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2857  * when the module parameter 'thread' is not zero (the default is zero).
2858  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2859  *
2860  * @pre thread != 0
2861  */
2862 static int srpt_ioctx_thread(void *arg)
2863 {
2864         struct srpt_ioctx *ioctx;
2865
2866         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2867         current->flags |= PF_NOFREEZE;
2868
2869         spin_lock_irq(&srpt_thread.thread_lock);
2870         while (!kthread_should_stop()) {
2871                 wait_queue_t wait;
2872                 init_waitqueue_entry(&wait, current);
2873
2874                 if (!srpt_test_ioctx_list()) {
2875                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2876
2877                         for (;;) {
2878                                 set_current_state(TASK_INTERRUPTIBLE);
2879                                 if (srpt_test_ioctx_list())
2880                                         break;
2881                                 spin_unlock_irq(&srpt_thread.thread_lock);
2882                                 schedule();
2883                                 spin_lock_irq(&srpt_thread.thread_lock);
2884                         }
2885                         set_current_state(TASK_RUNNING);
2886                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2887                 }
2888
2889                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2890                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2891                                            struct srpt_ioctx, comp_list);
2892
2893                         list_del(&ioctx->comp_list);
2894
2895                         spin_unlock_irq(&srpt_thread.thread_lock);
2896                         switch (ioctx->op) {
2897                         case IB_WC_SEND:
2898                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2899                                         SCST_CONTEXT_DIRECT);
2900                                 break;
2901                         case IB_WC_RDMA_WRITE:
2902                         case IB_WC_RDMA_READ:
2903                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2904                                 break;
2905                         case IB_WC_RECV:
2906                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2907                                 break;
2908                         default:
2909                                 PRINT_ERROR("received unrecognized WC opcode"
2910                                             " %d", ioctx->op);
2911                                 break;
2912                         }
2913 #if defined(CONFIG_SCST_DEBUG)
2914                         if (thread_processing_delay_in_us
2915                             <= MAX_UDELAY_MS * 1000)
2916                                 udelay(thread_processing_delay_in_us);
2917 #endif
2918                         spin_lock_irq(&srpt_thread.thread_lock);
2919                 }
2920         }
2921         spin_unlock_irq(&srpt_thread.thread_lock);
2922
2923         return 0;
2924 }
2925
2926 /* SCST target template for the SRP target implementation. */
2927 static struct scst_tgt_template srpt_template = {
2928         .name = DRV_NAME,
2929         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2930         .xmit_response_atomic = 1,
2931         .rdy_to_xfer_atomic = 1,
2932         .detect = srpt_detect,
2933         .release = srpt_release,
2934         .xmit_response = srpt_xmit_response,
2935         .rdy_to_xfer = srpt_rdy_to_xfer,
2936         .on_free_cmd = srpt_on_free_cmd,
2937         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2938 };
2939
2940 /*
2941  * The callback function srpt_release_class_dev() is called whenever a
2942  * device is removed from the /sys/class/infiniband_srpt device class.
2943  * Although this function has been left empty, a release function has been
2944  * defined such that upon module removal no complaint is logged about a
2945  * missing release function.
2946  */
2947 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2948 static void srpt_release_class_dev(struct class_device *class_dev)
2949 #else
2950 static void srpt_release_class_dev(struct device *dev)
2951 #endif
2952 {
2953 }
2954
2955 #ifdef CONFIG_SCST_PROC
2956
2957 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2958 static int srpt_trace_level_show(struct seq_file *seq, void *v)
2959 {
2960         return scst_proc_log_entry_read(seq, trace_flag, NULL);
2961 }
2962
2963 static ssize_t srpt_proc_trace_level_write(struct file *file,
2964         const char __user *buf, size_t length, loff_t *off)
2965 {
2966         return scst_proc_log_entry_write(file, buf, length, &trace_flag,
2967                 DEFAULT_SRPT_TRACE_FLAGS, NULL);
2968 }
2969
2970 static struct scst_proc_data srpt_log_proc_data = {
2971         SCST_DEF_RW_SEQ_OP(srpt_proc_trace_level_write)
2972         .show = srpt_trace_level_show,
2973 };
2974 #endif
2975
2976 #endif /* CONFIG_SCST_PROC */
2977
2978 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2979 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2980 #else
2981 static ssize_t show_login_info(struct device *dev,
2982                                struct device_attribute *attr, char *buf)
2983 #endif
2984 {
2985         struct srpt_device *sdev =
2986 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2987                 container_of(class_dev, struct srpt_device, class_dev);
2988 #else
2989                 container_of(dev, struct srpt_device, dev);
2990 #endif
2991         struct srpt_port *sport;
2992         int i;
2993         int len = 0;
2994
2995         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2996                 sport = &sdev->port[i];
2997
2998                 len += sprintf(buf + len,
2999                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
3000                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
3001                                "service_id=%016llx\n",
3002                                (unsigned long long) srpt_service_guid,
3003                                (unsigned long long) srpt_service_guid,
3004                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
3005                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
3006                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
3007                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
3008                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
3009                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
3010                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
3011                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
3012                                (unsigned long long) srpt_service_guid);
3013         }
3014
3015         return len;
3016 }
3017
3018 static struct class_attribute srpt_class_attrs[] = {
3019         __ATTR_NULL,
3020 };
3021
3022 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3023 static struct class_device_attribute srpt_dev_attrs[] = {
3024 #else
3025 static struct device_attribute srpt_dev_attrs[] = {
3026 #endif
3027         __ATTR(login_info, S_IRUGO, show_login_info, NULL),
3028         __ATTR_NULL,
3029 };
3030
3031 static struct class srpt_class = {
3032         .name        = "infiniband_srpt",
3033 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3034         .release = srpt_release_class_dev,
3035 #else
3036         .dev_release = srpt_release_class_dev,
3037 #endif
3038         .class_attrs = srpt_class_attrs,
3039 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3040         .class_dev_attrs = srpt_dev_attrs,
3041 #else
3042         .dev_attrs   = srpt_dev_attrs,
3043 #endif
3044 };
3045
3046 /*
3047  * Callback function called by the InfiniBand core when either an InfiniBand
3048  * device has been added or during the ib_register_client() call for each
3049  * registered InfiniBand device.
3050  */
3051 static void srpt_add_one(struct ib_device *device)
3052 {
3053         struct srpt_device *sdev;
3054         struct srpt_port *sport;
3055         struct ib_srq_init_attr srq_attr;
3056         int i;
3057
3058         TRACE_ENTRY();
3059
3060         TRACE_DBG("device = %p, device->dma_ops = %p", device, device->dma_ops);
3061
3062         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
3063         if (!sdev)
3064                 return;
3065
3066         sdev->device = device;
3067
3068 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3069         sdev->class_dev.class = &srpt_class;
3070         sdev->class_dev.dev = device->dma_device;
3071         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
3072                  "srpt-%s", device->name);
3073 #else
3074         sdev->dev.class = &srpt_class;
3075         sdev->dev.parent = device->dma_device;
3076 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
3077         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
3078 #else
3079         dev_set_name(&sdev->dev, "srpt-%s", device->name);
3080 #endif
3081 #endif
3082
3083 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3084         if (class_device_register(&sdev->class_dev))
3085                 goto free_dev;
3086 #else
3087         if (device_register(&sdev->dev))
3088                 goto free_dev;
3089 #endif
3090
3091         if (ib_query_device(device, &sdev->dev_attr))
3092                 goto err_dev;
3093
3094         sdev->pd = ib_alloc_pd(device);
3095         if (IS_ERR(sdev->pd))
3096                 goto err_dev;
3097
3098         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
3099         if (IS_ERR(sdev->mr))
3100                 goto err_pd;
3101
3102         srq_attr.event_handler = srpt_srq_event;
3103         srq_attr.srq_context = (void *)sdev;
3104         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
3105         srq_attr.attr.max_sge = 1;
3106         srq_attr.attr.srq_limit = 0;
3107
3108         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
3109         if (IS_ERR(sdev->srq))
3110                 goto err_mr;
3111
3112         TRACE_DBG("%s: create SRQ #wr= %d max_allow=%d dev= %s",
3113                __func__, srq_attr.attr.max_wr,
3114               sdev->dev_attr.max_srq_wr, device->name);
3115
3116         if (!srpt_service_guid)
3117                 srpt_service_guid = be64_to_cpu(device->node_guid);
3118
3119         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
3120         if (IS_ERR(sdev->cm_id))
3121                 goto err_srq;
3122
3123         /* print out target login information */
3124         TRACE_DBG("Target login info: id_ext=%016llx,"
3125                   "ioc_guid=%016llx,pkey=ffff,service_id=%016llx",
3126                   (unsigned long long) srpt_service_guid,
3127                   (unsigned long long) srpt_service_guid,
3128                   (unsigned long long) srpt_service_guid);
3129
3130         /*
3131          * We do not have a consistent service_id (ie. also id_ext of target_id)
3132          * to identify this target. We currently use the guid of the first HCA
3133          * in the system as service_id; therefore, the target_id will change
3134          * if this HCA is gone bad and replaced by different HCA
3135          */
3136         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0, NULL))
3137                 goto err_cm;
3138
3139         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
3140                               srpt_event_handler);
3141         if (ib_register_event_handler(&sdev->event_handler))
3142                 goto err_cm;
3143
3144         if (srpt_alloc_ioctx_ring(sdev, sdev->ioctx_ring,
3145                                   ARRAY_SIZE(sdev->ioctx_ring), 0))
3146                 goto err_event;
3147
3148         INIT_LIST_HEAD(&sdev->rch_list);
3149         spin_lock_init(&sdev->spinlock);
3150
3151         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
3152                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
3153
3154         ib_set_client_data(device, &srpt_client, sdev);
3155
3156         sdev->scst_tgt = scst_register(&srpt_template, NULL);
3157         if (!sdev->scst_tgt) {
3158                 PRINT_ERROR("SCST registration failed for %s.",
3159                             sdev->device->name);
3160                 goto err_ring;
3161         }
3162
3163         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
3164
3165         WARN_ON(sdev->device->phys_port_cnt
3166                 > sizeof(sdev->port)/sizeof(sdev->port[0]));
3167
3168         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
3169                 sport = &sdev->port[i - 1];
3170                 sport->sdev = sdev;
3171                 sport->port = i;
3172 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
3173                 /*
3174                  * A vanilla 2.6.19 or older kernel without backported OFED
3175                  * kernel headers.
3176                  */
3177                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
3178 #else
3179                 INIT_WORK(&sport->work, srpt_refresh_port_work);
3180 #endif
3181                 if (srpt_refresh_port(sport)) {
3182                         PRINT_ERROR("MAD registration failed for %s-%d.",
3183                                     sdev->device->name, i);
3184                         goto err_refresh_port;
3185                 }
3186         }
3187
3188         atomic_inc(&srpt_device_count);
3189
3190         TRACE_EXIT();
3191
3192         return;
3193
3194 err_refresh_port:
3195         scst_unregister(sdev->scst_tgt);
3196 err_ring:
3197         ib_set_client_data(device, &srpt_client, NULL);
3198         srpt_free_ioctx_ring(sdev, sdev->ioctx_ring,
3199                              ARRAY_SIZE(sdev->ioctx_ring));
3200 err_event:
3201         ib_unregister_event_handler(&sdev->event_handler);
3202 err_cm:
3203         ib_destroy_cm_id(sdev->cm_id);
3204 err_srq:
3205         ib_destroy_srq(sdev->srq);
3206 err_mr:
3207         ib_dereg_mr(sdev->mr);
3208 err_pd:
3209         ib_dealloc_pd(sdev->pd);
3210 err_dev:
3211 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3212         class_device_unregister(&sdev->class_dev);
3213 #else
3214         device_unregister(&sdev->dev);
3215 #endif
3216 free_dev:
3217         kfree(sdev);
3218
3219         TRACE_EXIT();
3220 }
3221
3222 /*
3223  * Callback function called by the InfiniBand core when either an InfiniBand
3224  * device has been removed or during the ib_unregister_client() call for each
3225  * registered InfiniBand device.
3226  */
3227 static void srpt_remove_one(struct ib_device *device)
3228 {
3229         int i;
3230         struct srpt_device *sdev;
3231
3232         TRACE_ENTRY();
3233
3234         sdev = ib_get_client_data(device, &srpt_client);
3235 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
3236         WARN_ON(!sdev);
3237         if (!sdev)
3238                 return;
3239 #else
3240         if (WARN_ON(!sdev))
3241                 return;
3242 #endif
3243
3244         srpt_unregister_mad_agent(sdev);
3245
3246         ib_unregister_event_handler(&sdev->event_handler);
3247
3248         /* Cancel any work queued by the just unregistered IB event handler. */
3249         for (i = 0; i < sdev->device->phys_port_cnt; i++)
3250 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
3251                 cancel_work_sync(&sdev->port[i].work);
3252 #else
3253                 /*
3254                  * cancel_work_sync() was introduced in kernel 2.6.22. Older
3255                  * kernels do not have a facility to cancel scheduled work, so
3256                  * wait until the scheduled work finished.
3257                  */
3258                 flush_workqueue(&sdev->port[i].work);
3259 #endif
3260
3261         ib_destroy_cm_id(sdev->cm_id);
3262         ib_destroy_srq(sdev->srq);
3263         ib_dereg_mr(sdev->mr);
3264         ib_dealloc_pd(sdev->pd);
3265
3266 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3267         class_device_unregister(&sdev->class_dev);
3268 #else
3269         device_unregister(&sdev->dev);
3270 #endif
3271
3272         /*
3273          * Unregistering an SCST target must happen after destroying sdev->cm_id
3274          * such that no new SRP_LOGIN_REQ information units can arrive while
3275          * destroying the SCST target.
3276          */
3277         scst_unregister(sdev->scst_tgt);
3278         sdev->scst_tgt = NULL;
3279
3280         srpt_free_ioctx_ring(sdev, sdev->ioctx_ring,
3281                              ARRAY_SIZE(sdev->ioctx_ring));
3282         kfree(sdev);
3283
3284         TRACE_EXIT();
3285 }
3286
3287 #ifdef CONFIG_SCST_PROC
3288
3289 /**
3290  * Create procfs entries for srpt. Currently the only procfs entry created
3291  * by this function is the "trace_level" entry.
3292  */
3293 static int srpt_register_procfs_entry(struct scst_tgt_template *tgt)
3294 {
3295         int res = 0;
3296 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
3297         struct proc_dir_entry *p, *root;
3298
3299         root = scst_proc_get_tgt_root(tgt);
3300         WARN_ON(!root);
3301         if (root) {
3302                 /*
3303                  * Fill in the scst_proc_data::data pointer, which is used in
3304                  * a printk(KERN_INFO ...) statement in
3305                  * scst_proc_log_entry_write() in scst_proc.c.
3306                  */
3307                 srpt_log_proc_data.data = (char *)tgt->name;
3308                 p = scst_create_proc_entry(root, SRPT_PROC_TRACE_LEVEL_NAME,
3309                                            &srpt_log_proc_data);
3310                 if (!p)
3311                         res = -ENOMEM;
3312         } else
3313                 res = -ENOMEM;
3314
3315 #endif
3316         return res;
3317 }
3318
3319 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt)
3320 {
3321 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
3322         struct proc_dir_entry *root;
3323
3324         root = scst_proc_get_tgt_root(tgt);
3325         WARN_ON(!root);
3326         if (root)
3327                 remove_proc_entry(SRPT_PROC_TRACE_LEVEL_NAME, root);
3328 #endif
3329 }
3330
3331 #endif /*CONFIG_SCST_PROC*/
3332
3333 /*
3334  * Module initialization.
3335  *
3336  * Note: since ib_register_client() registers callback functions, and since at
3337  * least one of these callback functions (srpt_add_one()) calls SCST functions,
3338  * the SCST target template must be registered before ib_register_client() is
3339  * called.
3340  */
3341 static int __init srpt_init_module(void)
3342 {
3343         int ret;
3344
3345         ret = -EINVAL;
3346         if (srp_max_message_size < MIN_MAX_MESSAGE_SIZE) {
3347                 PRINT_ERROR("invalid value %d for kernel module parameter"
3348                             " srp_max_message_size -- must be at least %d.",
3349                             srp_max_message_size,
3350                             MIN_MAX_MESSAGE_SIZE);
3351                 goto out;
3352         }
3353
3354         ret = class_register(&srpt_class);
3355         if (ret) {
3356                 PRINT_ERROR("%s", "couldn't register class ib_srpt");
3357                 goto out;
3358         }
3359
3360         ret = scst_register_target_template(&srpt_template);
3361         if (ret < 0) {
3362                 PRINT_ERROR("%s", "couldn't register with scst");
3363                 ret = -ENODEV;
3364                 goto out_unregister_class;
3365         }
3366
3367 #ifdef CONFIG_SCST_PROC
3368         ret = srpt_register_procfs_entry(&srpt_template);
3369         if (ret) {
3370                 PRINT_ERROR("%s", "couldn't register procfs entry");
3371                 goto out_unregister_target;
3372         }
3373 #endif /*CONFIG_SCST_PROC*/
3374
3375         ret = ib_register_client(&srpt_client);
3376         if (ret) {
3377                 PRINT_ERROR("%s", "couldn't register IB client");
3378                 goto out_unregister_target;
3379         }
3380
3381         if (thread) {
3382                 spin_lock_init(&srpt_thread.thread_lock);
3383                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
3384                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
3385                                                  NULL, "srpt_thread");
3386                 if (IS_ERR(srpt_thread.thread)) {
3387                         srpt_thread.thread = NULL;
3388                         thread = 0;
3389                 }
3390         }
3391
3392         return 0;
3393
3394 out_unregister_target:
3395 #ifdef CONFIG_SCST_PROC
3396         /*
3397          * Note: the procfs entry is unregistered in srpt_release(), which is
3398          * called by scst_unregister_target_template().
3399          */
3400 #endif /*CONFIG_SCST_PROC*/
3401         scst_unregister_target_template(&srpt_template);
3402 out_unregister_class:
3403         class_unregister(&srpt_class);
3404 out:
3405         return ret;
3406 }
3407
3408 static void __exit srpt_cleanup_module(void)
3409 {
3410         TRACE_ENTRY();
3411
3412         ib_unregister_client(&srpt_client);
3413         scst_unregister_target_template(&srpt_template);
3414         if (srpt_thread.thread)
3415                 kthread_stop(srpt_thread.thread);
3416         class_unregister(&srpt_class);
3417
3418         TRACE_EXIT();
3419 }
3420
3421 module_init(srpt_init_module);
3422 module_exit(srpt_cleanup_module);