Removed trailing whitespace.
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43 #include <linux/delay.h>
44 #include <asm/atomic.h>
45 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
46 #include <linux/proc_fs.h>
47 #include <linux/seq_file.h>
48 #endif
49 #include "ib_srpt.h"
50 #include "scst_debug.h"
51
52 #define CONFIG_SCST_PROC
53
54 /* Name of this kernel module. */
55 #define DRV_NAME                "ib_srpt"
56 /* Prefix for printk() kernel messages. */
57 #define LOG_PFX                 DRV_NAME ": "
58 #define DRV_VERSION             "1.0.1"
59 #define DRV_RELDATE             "July 10, 2008"
60 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
61 /* Flags to be used in SCST debug tracing statements. */
62 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
63                                   | TRACE_MGMT | TRACE_SPECIAL)
64 /* Name of the entry that will be created under /proc/scsi_tgt/ib_srpt. */
65 #define SRPT_PROC_TRACE_LEVEL_NAME      "trace_level"
66 #endif
67
68 #define MELLANOX_SRPT_ID_STRING "SCST SRP target"
69
70 MODULE_AUTHOR("Vu Pham");
71 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
72                    "v" DRV_VERSION " (" DRV_RELDATE ")");
73 MODULE_LICENSE("Dual BSD/GPL");
74
75 struct srpt_thread {
76         /* Protects thread_ioctx_list. */
77         spinlock_t thread_lock;
78         /* I/O contexts to be processed by the kernel thread. */
79         struct list_head thread_ioctx_list;
80         /* SRPT kernel thread. */
81         struct task_struct *thread;
82 };
83
84 /*
85  * Global Variables
86  */
87
88 static u64 srpt_service_guid;
89 /* List of srpt_device structures. */
90 static atomic_t srpt_device_count;
91 static int use_port_guid_in_session_name;
92 static int thread;
93 static struct srpt_thread srpt_thread;
94 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
95 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
96 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
97 module_param(trace_flag, long, 0644);
98 MODULE_PARM_DESC(trace_flag,
99                  "Trace flags for the ib_srpt kernel module.");
100 #endif
101 #if defined(CONFIG_SCST_DEBUG)
102 static unsigned long interrupt_processing_delay_in_us;
103 module_param(interrupt_processing_delay_in_us, long, 0744);
104 MODULE_PARM_DESC(interrupt_processing_delay_in_us,
105                  "CQ completion handler interrupt delay in microseconds.");
106 static unsigned long thread_processing_delay_in_us;
107 module_param(thread_processing_delay_in_us, long, 0744);
108 MODULE_PARM_DESC(thread_processing_delay_in_us,
109                  "SRP thread processing delay in microseconds.");
110 #endif
111
112 module_param(thread, int, 0444);
113 MODULE_PARM_DESC(thread,
114                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
115                  "where possible");
116
117 static unsigned int srp_max_rdma_size = 65536;
118 module_param(srp_max_rdma_size, int, 0744);
119 MODULE_PARM_DESC(thread,
120                  "Maximum size of SRP RDMA transfers for new connections");
121
122 module_param(use_port_guid_in_session_name, bool, 0444);
123 MODULE_PARM_DESC(use_port_guid_in_session_name,
124                  "Use target port ID in the SCST session name such that"
125                  " redundant paths between multiport systems can be masked.");
126
127 static void srpt_add_one(struct ib_device *device);
128 static void srpt_remove_one(struct ib_device *device);
129 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
130 #ifdef CONFIG_SCST_PROC
131 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt);
132 #endif /*CONFIG_SCST_PROC*/
133
134 static struct ib_client srpt_client = {
135         .name = DRV_NAME,
136         .add = srpt_add_one,
137         .remove = srpt_remove_one
138 };
139
140 /**
141  * Atomically test and set the channel state.
142  * @ch: RDMA channel.
143  * @old: channel state to compare with.
144  * @new: state to change the channel state to if the current state matches the
145  *       argument 'old'.
146  *
147  * Returns true if the channel state matched old upon entry of this function,
148  * and false otherwise.
149  */
150 static bool srpt_test_and_set_channel_state(struct srpt_rdma_ch *ch,
151                                             enum rdma_ch_state old,
152                                             enum rdma_ch_state new)
153 {
154         unsigned long flags;
155         enum rdma_ch_state cur;
156
157         spin_lock_irqsave(&ch->spinlock, flags);
158         cur = ch->state;
159         if (cur == old)
160                 ch->state = new;
161         spin_unlock_irqrestore(&ch->spinlock, flags);
162
163         return cur == old;
164 }
165
166 /*
167  * Callback function called by the InfiniBand core when an asynchronous IB
168  * event occurs. This callback may occur in interrupt context. See also
169  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
170  * Architecture Specification.
171  */
172 static void srpt_event_handler(struct ib_event_handler *handler,
173                                struct ib_event *event)
174 {
175         struct srpt_device *sdev;
176         struct srpt_port *sport;
177
178         TRACE_ENTRY();
179
180         sdev = ib_get_client_data(event->device, &srpt_client);
181         if (!sdev || sdev->device != event->device)
182                 return;
183
184         TRACE_DBG("ASYNC event= %d on device= %s",
185                   event->event, sdev->device->name);
186
187         switch (event->event) {
188         case IB_EVENT_PORT_ERR:
189                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
190                         sport = &sdev->port[event->element.port_num - 1];
191                         sport->lid = 0;
192                         sport->sm_lid = 0;
193                 }
194                 break;
195         case IB_EVENT_PORT_ACTIVE:
196         case IB_EVENT_LID_CHANGE:
197         case IB_EVENT_PKEY_CHANGE:
198         case IB_EVENT_SM_CHANGE:
199         case IB_EVENT_CLIENT_REREGISTER:
200                 /*
201                  * Refresh port data asynchronously. Note: it is safe to call
202                  * schedule_work() even if &sport->work is already on the
203                  * global workqueue because schedule_work() tests for the
204                  * work_pending() condition before adding &sport->work to the
205                  * global work queue.
206                  */
207                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
208                         sport = &sdev->port[event->element.port_num - 1];
209                         if (!sport->lid && !sport->sm_lid)
210                                 schedule_work(&sport->work);
211                 }
212                 break;
213         default:
214                 break;
215         }
216
217         TRACE_EXIT();
218 }
219
220 /*
221  * Callback function called by the InfiniBand core for SRQ (shared receive
222  * queue) events.
223  */
224 static void srpt_srq_event(struct ib_event *event, void *ctx)
225 {
226         TRACE_ENTRY();
227
228         TRACE_DBG("SRQ event %d", event->event);
229
230         TRACE_EXIT();
231 }
232
233 /*
234  * Callback function called by the InfiniBand core for QP (queue pair) events.
235  */
236 static void srpt_qp_event(struct ib_event *event, void *ctx)
237 {
238         struct srpt_rdma_ch *ch = ctx;
239
240         TRACE_DBG("QP event %d on cm_id=%p sess_name=%s state=%d",
241                   event->event, ch->cm_id, ch->sess_name, ch->state);
242
243         switch (event->event) {
244         case IB_EVENT_COMM_EST:
245 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
246                 ib_cm_notify(ch->cm_id, event->event);
247 #else
248                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
249                 PRINT_ERROR("%s", "how to perform ib_cm_notify() on a"
250                             " vanilla 2.6.18 kernel ???");
251 #endif
252                 break;
253         case IB_EVENT_QP_LAST_WQE_REACHED:
254                 if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_LIVE,
255                                         RDMA_CHANNEL_DISCONNECTING)) {
256                         PRINT_INFO("disconnected session %s.", ch->sess_name);
257                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
258                 }
259                 break;
260         default:
261                 break;
262         }
263 }
264
265 /*
266  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
267  * the lowest four bits of value in element slot of the array of four bit
268  * elements called c_list (controller list). The index slot is one-based.
269  *
270  * @pre 1 <= slot && 0 <= value && value < 16
271  */
272 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
273 {
274         u16 id;
275         u8 tmp;
276
277         id = (slot - 1) / 2;
278         if (slot & 0x1) {
279                 tmp = c_list[id] & 0xf;
280                 c_list[id] = (value << 4) | tmp;
281         } else {
282                 tmp = c_list[id] & 0xf0;
283                 c_list[id] = (value & 0xf) | tmp;
284         }
285 }
286
287 /*
288  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
289  * ClassPortInfo in the InfiniBand Architecture Specification.
290  */
291 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
292 {
293         struct ib_class_port_info *cif;
294
295         cif = (struct ib_class_port_info *)mad->data;
296         memset(cif, 0, sizeof *cif);
297         cif->base_version = 1;
298         cif->class_version = 1;
299         cif->resp_time_value = 20;
300
301         mad->mad_hdr.status = 0;
302 }
303
304 /*
305  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
306  * InfiniBand Architecture Specification. See also section B.7,
307  * table B.6 in the T10 SRP r16a document.
308  */
309 static void srpt_get_iou(struct ib_dm_mad *mad)
310 {
311         struct ib_dm_iou_info *ioui;
312         u8 slot;
313         int i;
314
315         ioui = (struct ib_dm_iou_info *)mad->data;
316         ioui->change_id = 1;
317         ioui->max_controllers = 16;
318
319         /* set present for slot 1 and empty for the rest */
320         srpt_set_ioc(ioui->controller_list, 1, 1);
321         for (i = 1, slot = 2; i < 16; i++, slot++)
322                 srpt_set_ioc(ioui->controller_list, slot, 0);
323
324         mad->mad_hdr.status = 0;
325 }
326
327 /*
328  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
329  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
330  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
331  * document.
332  */
333 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
334                          struct ib_dm_mad *mad)
335 {
336         struct ib_dm_ioc_profile *iocp;
337
338         iocp = (struct ib_dm_ioc_profile *)mad->data;
339
340         if (!slot || slot > 16) {
341                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
342                 return;
343         }
344
345         if (slot > 2) {
346                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
347                 return;
348         }
349
350         memset(iocp, 0, sizeof *iocp);
351         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
352         iocp->guid = cpu_to_be64(srpt_service_guid);
353         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
354         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
355         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
356         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
357         iocp->subsys_device_id = 0x0;
358         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
359         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
360         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
361         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
362         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
363         iocp->rdma_read_depth = 4;
364         iocp->send_size = cpu_to_be32(MAX_MESSAGE_SIZE);
365         iocp->rdma_size = cpu_to_be32(min(max(srp_max_rdma_size, 256U),
366                                           1U << 24));
367         iocp->num_svc_entries = 1;
368         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
369                 SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
370
371         mad->mad_hdr.status = 0;
372 }
373
374 /*
375  * Device management: write ServiceEntries to mad for the given slot. See also
376  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
377  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
378  */
379 static void srpt_get_svc_entries(u64 ioc_guid,
380                                  u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
381 {
382         struct ib_dm_svc_entries *svc_entries;
383
384         WARN_ON(!ioc_guid);
385
386         if (!slot || slot > 16) {
387                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
388                 return;
389         }
390
391         if (slot > 2 || lo > hi || hi > 1) {
392                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
393                 return;
394         }
395
396         svc_entries = (struct ib_dm_svc_entries *)mad->data;
397         memset(svc_entries, 0, sizeof *svc_entries);
398         svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid);
399         snprintf(svc_entries->service_entries[0].name,
400                  sizeof(svc_entries->service_entries[0].name),
401                  "%s%016llx",
402                  SRP_SERVICE_NAME_PREFIX,
403                  (unsigned long long)ioc_guid);
404
405         mad->mad_hdr.status = 0;
406 }
407
408 /*
409  * Actual processing of a received MAD *rq_mad received through source port *sp
410  * (MAD = InfiniBand management datagram). The response to be sent back is
411  * written to *rsp_mad.
412  */
413 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
414                                  struct ib_dm_mad *rsp_mad)
415 {
416         u16 attr_id;
417         u32 slot;
418         u8 hi, lo;
419
420         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
421         switch (attr_id) {
422         case DM_ATTR_CLASS_PORT_INFO:
423                 srpt_get_class_port_info(rsp_mad);
424                 break;
425         case DM_ATTR_IOU_INFO:
426                 srpt_get_iou(rsp_mad);
427                 break;
428         case DM_ATTR_IOC_PROFILE:
429                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
430                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
431                 break;
432         case DM_ATTR_SVC_ENTRIES:
433                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
434                 hi = (u8) ((slot >> 8) & 0xff);
435                 lo = (u8) (slot & 0xff);
436                 slot = (u16) ((slot >> 16) & 0xffff);
437                 srpt_get_svc_entries(srpt_service_guid,
438                                      slot, hi, lo, rsp_mad);
439                 break;
440         default:
441                 rsp_mad->mad_hdr.status =
442                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
443                 break;
444         }
445 }
446
447 /*
448  * Callback function that is called by the InfiniBand core after transmission of
449  * a MAD. (MAD = management datagram; AH = address handle.)
450  */
451 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
452                                   struct ib_mad_send_wc *mad_wc)
453 {
454         ib_destroy_ah(mad_wc->send_buf->ah);
455         ib_free_send_mad(mad_wc->send_buf);
456 }
457
458 /*
459  * Callback function that is called by the InfiniBand core after reception of
460  * a MAD (management datagram).
461  */
462 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
463                                   struct ib_mad_recv_wc *mad_wc)
464 {
465         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
466         struct ib_ah *ah;
467         struct ib_mad_send_buf *rsp;
468         struct ib_dm_mad *dm_mad;
469
470         if (!mad_wc || !mad_wc->recv_buf.mad)
471                 return;
472
473         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
474                                   mad_wc->recv_buf.grh, mad_agent->port_num);
475         if (IS_ERR(ah))
476                 goto err;
477
478         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
479
480         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
481                                  mad_wc->wc->pkey_index, 0,
482                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
483                                  GFP_KERNEL);
484         if (IS_ERR(rsp))
485                 goto err_rsp;
486
487         rsp->ah = ah;
488
489         dm_mad = rsp->mad;
490         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
491         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
492         dm_mad->mad_hdr.status = 0;
493
494         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
495         case IB_MGMT_METHOD_GET:
496                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
497                 break;
498         case IB_MGMT_METHOD_SET:
499                 dm_mad->mad_hdr.status =
500                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
501                 break;
502         default:
503                 dm_mad->mad_hdr.status =
504                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
505                 break;
506         }
507
508         if (!ib_post_send_mad(rsp, NULL)) {
509                 ib_free_recv_mad(mad_wc);
510                 /* will destroy_ah & free_send_mad in send completion */
511                 return;
512         }
513
514         ib_free_send_mad(rsp);
515
516 err_rsp:
517         ib_destroy_ah(ah);
518 err:
519         ib_free_recv_mad(mad_wc);
520 }
521
522 /*
523  * Enable InfiniBand management datagram processing, update the cached sm_lid,
524  * lid and gid values, and register a callback function for processing MADs
525  * on the specified port. It is safe to call this function more than once for
526  * the same port.
527  */
528 static int srpt_refresh_port(struct srpt_port *sport)
529 {
530         struct ib_mad_reg_req reg_req;
531         struct ib_port_modify port_modify;
532         struct ib_port_attr port_attr;
533         int ret;
534
535         TRACE_ENTRY();
536
537         memset(&port_modify, 0, sizeof port_modify);
538         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
539         port_modify.clr_port_cap_mask = 0;
540
541         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
542         if (ret)
543                 goto err_mod_port;
544
545         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
546         if (ret)
547                 goto err_query_port;
548
549         sport->sm_lid = port_attr.sm_lid;
550         sport->lid = port_attr.lid;
551
552         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
553         if (ret)
554                 goto err_query_port;
555
556         if (!sport->mad_agent) {
557                 memset(&reg_req, 0, sizeof reg_req);
558                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
559                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
560                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
561                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
562
563                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
564                                                          sport->port,
565                                                          IB_QPT_GSI,
566                                                          &reg_req, 0,
567                                                          srpt_mad_send_handler,
568                                                          srpt_mad_recv_handler,
569                                                          sport);
570                 if (IS_ERR(sport->mad_agent)) {
571                         ret = PTR_ERR(sport->mad_agent);
572                         sport->mad_agent = NULL;
573                         goto err_query_port;
574                 }
575         }
576
577         TRACE_EXIT_RES(0);
578
579         return 0;
580
581 err_query_port:
582
583         port_modify.set_port_cap_mask = 0;
584         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
585         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
586
587 err_mod_port:
588
589         TRACE_EXIT_RES(ret);
590
591         return ret;
592 }
593
594 /*
595  * Unregister the callback function for processing MADs and disable MAD
596  * processing for all ports of the specified device. It is safe to call this
597  * function more than once for the same device.
598  */
599 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
600 {
601         struct ib_port_modify port_modify = {
602                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
603         };
604         struct srpt_port *sport;
605         int i;
606
607         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
608                 sport = &sdev->port[i - 1];
609                 WARN_ON(sport->port != i);
610                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
611                         PRINT_ERROR("%s", "disabling MAD processing failed.");
612                 if (sport->mad_agent) {
613                         ib_unregister_mad_agent(sport->mad_agent);
614                         sport->mad_agent = NULL;
615                 }
616         }
617 }
618
619 /*
620  * Allocate and initialize an SRPT I/O context structure.
621  */
622 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
623 {
624         struct srpt_ioctx *ioctx;
625
626         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
627         if (!ioctx)
628                 goto out;
629
630         ioctx->buf = kzalloc(MAX_MESSAGE_SIZE, GFP_KERNEL);
631         if (!ioctx->buf)
632                 goto out_free_ioctx;
633
634         ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf,
635                                        MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
636         if (ib_dma_mapping_error(sdev->device, ioctx->dma))
637                 goto out_free_buf;
638
639         return ioctx;
640
641 out_free_buf:
642         kfree(ioctx->buf);
643 out_free_ioctx:
644         kfree(ioctx);
645 out:
646         return NULL;
647 }
648
649 /*
650  * Deallocate an SRPT I/O context structure.
651  */
652 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
653 {
654         if (!ioctx)
655                 return;
656
657         ib_dma_unmap_single(sdev->device, ioctx->dma,
658                             MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
659         kfree(ioctx->buf);
660         kfree(ioctx);
661 }
662
663 /*
664  * Associate a ring of SRPT I/O context structures with the specified device.
665  */
666 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
667 {
668         int i;
669
670         TRACE_ENTRY();
671
672         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
673                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
674
675                 if (!sdev->ioctx_ring[i])
676                         goto err;
677
678                 sdev->ioctx_ring[i]->index = i;
679         }
680
681         TRACE_EXIT_RES(0);
682
683         return 0;
684
685 err:
686         while (--i > 0) {
687                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
688                 sdev->ioctx_ring[i] = NULL;
689         }
690         TRACE_EXIT_RES(-ENOMEM);
691         return -ENOMEM;
692 }
693
694 /* Free the ring of SRPT I/O context structures. */
695 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
696 {
697         int i;
698
699         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
700                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
701                 sdev->ioctx_ring[i] = NULL;
702         }
703 }
704
705 /** Atomically get the state of a command. */
706 static enum srpt_command_state srpt_get_cmd_state(struct srpt_ioctx *ioctx)
707 {
708         barrier();
709         return atomic_read(&ioctx->state);
710 }
711
712 /**
713  * Atomically set the state of a command.
714  * @new: New state to be set.
715  *
716  * Does not modify the state of aborted commands.
717  *
718  * Returns the previous command state.
719  */
720 static enum srpt_command_state srpt_set_cmd_state(struct srpt_ioctx *ioctx,
721                                                   enum srpt_command_state new)
722 {
723         enum srpt_command_state previous;
724
725         WARN_ON(new == SRPT_STATE_NEW);
726
727         do {
728                 barrier();
729                 previous = atomic_read(&ioctx->state);
730         } while (previous != SRPT_STATE_ABORTED
731                  && atomic_cmpxchg(&ioctx->state, previous, new) != previous);
732         barrier();
733
734         return previous;
735 }
736
737 /**
738  * Atomically test and set the state of a command.
739  * @expected: State to compare against.
740  * @new:      New state to be set if the current state matches 'expected'.
741  *
742  * Returns the previous command state.
743  */
744 static enum srpt_command_state
745 srpt_test_and_set_cmd_state(struct srpt_ioctx *ioctx,
746                             enum srpt_command_state expected,
747                             enum srpt_command_state new)
748 {
749         enum srpt_command_state previous;
750
751         WARN_ON(expected == SRPT_STATE_ABORTED);
752         WARN_ON(new == SRPT_STATE_NEW);
753
754         do {
755                 barrier();
756                 previous = atomic_read(&ioctx->state);
757         } while (previous != SRPT_STATE_ABORTED
758                  && previous == expected
759                  && atomic_cmpxchg(&ioctx->state, previous, new) != previous);
760         barrier();
761
762         return previous;
763 }
764
765 /*
766  * Post a receive request on the work queue of InfiniBand device 'sdev'.
767  */
768 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
769 {
770         struct ib_sge list;
771         struct ib_recv_wr wr, *bad_wr;
772
773         wr.wr_id = ioctx->index | SRPT_OP_RECV;
774
775         list.addr = ioctx->dma;
776         list.length = MAX_MESSAGE_SIZE;
777         list.lkey = sdev->mr->lkey;
778
779         wr.next = NULL;
780         wr.sg_list = &list;
781         wr.num_sge = 1;
782
783         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
784 }
785
786 /*
787  * Post an IB send request.
788  * @ch: RDMA channel to post the send request on.
789  * @ioctx: I/O context of the send request.
790  * @len: length of the request to be sent in bytes.
791  *
792  * Returns zero upon success and a non-zero value upon failure.
793  */
794 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
795                           int len)
796 {
797         struct ib_sge list;
798         struct ib_send_wr wr, *bad_wr;
799         struct srpt_device *sdev = ch->sport->sdev;
800
801         ib_dma_sync_single_for_device(sdev->device, ioctx->dma,
802                                       MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
803
804         list.addr = ioctx->dma;
805         list.length = len;
806         list.lkey = sdev->mr->lkey;
807
808         wr.next = NULL;
809         wr.wr_id = ioctx->index;
810         wr.sg_list = &list;
811         wr.num_sge = 1;
812         wr.opcode = IB_WR_SEND;
813         wr.send_flags = IB_SEND_SIGNALED;
814
815         return ib_post_send(ch->qp, &wr, &bad_wr);
816 }
817
818 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
819                              int *ind)
820 {
821         struct srp_indirect_buf *idb;
822         struct srp_direct_buf *db;
823
824         *ind = 0;
825         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
826             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
827                 ioctx->n_rbuf = 1;
828                 ioctx->rbufs = &ioctx->single_rbuf;
829
830                 db = (void *)srp_cmd->add_data;
831                 memcpy(ioctx->rbufs, db, sizeof *db);
832                 ioctx->data_len = be32_to_cpu(db->len);
833         } else {
834                 idb = (void *)srp_cmd->add_data;
835
836                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
837
838                 if (ioctx->n_rbuf >
839                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
840                         *ind = 1;
841                         ioctx->n_rbuf = 0;
842                         goto out;
843                 }
844
845                 if (ioctx->n_rbuf == 1)
846                         ioctx->rbufs = &ioctx->single_rbuf;
847                 else
848                         ioctx->rbufs =
849                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
850                 if (!ioctx->rbufs) {
851                         ioctx->n_rbuf = 0;
852                         return -ENOMEM;
853                 }
854
855                 db = idb->desc_list;
856                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
857                 ioctx->data_len = be32_to_cpu(idb->len);
858         }
859 out:
860         return 0;
861 }
862
863 /*
864  * Modify the attributes of queue pair 'qp': allow local write, remote read,
865  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
866  */
867 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
868 {
869         struct ib_qp_attr *attr;
870         int ret;
871
872         attr = kzalloc(sizeof *attr, GFP_KERNEL);
873         if (!attr)
874                 return -ENOMEM;
875
876         attr->qp_state = IB_QPS_INIT;
877         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
878             IB_ACCESS_REMOTE_WRITE;
879         attr->port_num = ch->sport->port;
880         attr->pkey_index = 0;
881
882         ret = ib_modify_qp(qp, attr,
883                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
884                            IB_QP_PKEY_INDEX);
885
886         kfree(attr);
887         return ret;
888 }
889
890 /**
891  * Change the state of a channel to 'ready to receive' (RTR).
892  * @ch: channel of the queue pair.
893  * @qp: queue pair to change the state of.
894  *
895  * Returns zero upon success and a negative value upon failure.
896  *
897  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
898  * If this structure ever becomes larger, it might be necessary to allocate
899  * it dynamically instead of on the stack.
900  */
901 static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
902 {
903         struct ib_qp_attr qp_attr;
904         int attr_mask;
905         int ret;
906
907         qp_attr.qp_state = IB_QPS_RTR;
908         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
909         if (ret)
910                 goto out;
911
912         qp_attr.max_dest_rd_atomic = 4;
913
914         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
915
916 out:
917         return ret;
918 }
919
920 /**
921  * Change the state of a channel to 'ready to send' (RTS).
922  * @ch: channel of the queue pair.
923  * @qp: queue pair to change the state of.
924  *
925  * Returns zero upon success and a negative value upon failure.
926  *
927  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
928  * If this structure ever becomes larger, it might be necessary to allocate
929  * it dynamically instead of on the stack.
930  */
931 static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
932 {
933         struct ib_qp_attr qp_attr;
934         int attr_mask;
935         int ret;
936
937         qp_attr.qp_state = IB_QPS_RTS;
938         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
939         if (ret)
940                 goto out;
941
942         qp_attr.max_rd_atomic = 4;
943
944         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
945
946 out:
947         return ret;
948 }
949
950 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
951 {
952         int i;
953
954         if (ioctx->n_rdma_ius > 0 && ioctx->rdma_ius) {
955                 struct rdma_iu *riu = ioctx->rdma_ius;
956
957                 for (i = 0; i < ioctx->n_rdma_ius; ++i, ++riu)
958                         kfree(riu->sge);
959                 kfree(ioctx->rdma_ius);
960         }
961
962         if (ioctx->n_rbuf > 1)
963                 kfree(ioctx->rbufs);
964
965         /* If ch == NULL this means that the command has been aborted. */
966         if (!ch)
967                 return;
968
969         if (srpt_post_recv(ch->sport->sdev, ioctx))
970                 PRINT_ERROR("%s", "SRQ post_recv failed - this is serious.");
971                 /* we should queue it back to free_ioctx queue */
972         else
973                 atomic_inc(&ch->req_lim_delta);
974 }
975
976 static void srpt_abort_scst_cmd(struct srpt_device *sdev,
977                                 struct scst_cmd *scmnd,
978                                 bool tell_initiator)
979 {
980         struct srpt_ioctx *ioctx;
981         scst_data_direction dir;
982         struct srpt_rdma_ch *ch;
983         enum srpt_command_state previous_state;
984
985         ioctx = scst_cmd_get_tgt_priv(scmnd);
986         BUG_ON(!ioctx);
987         dir = scst_cmd_get_data_direction(scmnd);
988         if (dir != SCST_DATA_NONE && scst_cmd_get_sg(scmnd))
989                 ib_dma_unmap_sg(sdev->device,
990                                 scst_cmd_get_sg(scmnd),
991                                 scst_cmd_get_sg_cnt(scmnd),
992                                 scst_to_tgt_dma_dir(dir));
993
994         previous_state = srpt_set_cmd_state(ioctx, SRPT_STATE_ABORTED);
995         TRACE_DBG("Aborting cmd with state %d and tag %lld",
996                   previous_state, scst_cmd_get_tag(scmnd));
997         switch (previous_state) {
998         case SRPT_STATE_NEW:
999                 /*
1000                  * Do not try to abort the SCST command here but wait until
1001                  * the SCST core has called srpt_rdy_to_xfer() or
1002                  * srpt_xmit_response(). Since srpt_release_channel() will
1003                  * finish before srpt_on_free_cmd() is called, set the channel
1004                  * pointer inside the SCST command to NULL such that
1005                  * srpt_on_free_cmd() will not dereference a dangling pointer.
1006                  */
1007                 ch = ioctx->ch;
1008                 ioctx->ch = NULL;
1009                 BUG_ON(!ch);
1010                 spin_lock_irq(&ch->spinlock);
1011                 list_del(&ioctx->scmnd_list);
1012                 ch->active_scmnd_cnt--;
1013                 spin_unlock_irq(&ch->spinlock);
1014                 break;
1015         case SRPT_STATE_NEED_DATA:
1016                 WARN_ON(scst_cmd_get_data_direction(ioctx->scmnd)
1017                         == SCST_DATA_READ);
1018                 scst_rx_data(scmnd,
1019                              tell_initiator ? SCST_RX_STATUS_ERROR
1020                              : SCST_RX_STATUS_ERROR_FATAL,
1021                              SCST_CONTEXT_THREAD);
1022                 break;
1023         case SRPT_STATE_PROCESSED:
1024                 scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_FAILED);
1025                 WARN_ON(scmnd->state != SCST_CMD_STATE_XMIT_WAIT);
1026                 scst_tgt_cmd_done(scmnd, scst_estimate_context());
1027                 break;
1028         default:
1029                 TRACE_DBG("Aborting cmd with state %d", previous_state);
1030                 WARN_ON("ERROR: unexpected command state");
1031         }
1032 }
1033
1034 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
1035 {
1036         struct srpt_ioctx *ioctx;
1037         struct srpt_device *sdev = ch->sport->sdev;
1038
1039         if (wc->wr_id & SRPT_OP_RECV) {
1040                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
1041                 PRINT_ERROR("%s", "This is serious - SRQ is in bad state.");
1042         } else {
1043                 ioctx = sdev->ioctx_ring[wc->wr_id];
1044
1045                 if (ioctx->scmnd)
1046                         srpt_abort_scst_cmd(sdev, ioctx->scmnd, true);
1047                 else
1048                         srpt_reset_ioctx(ch, ioctx);
1049         }
1050 }
1051
1052 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
1053                                   struct srpt_ioctx *ioctx,
1054                                   enum scst_exec_context context)
1055 {
1056         if (ioctx->scmnd) {
1057                 scst_data_direction dir =
1058                         scst_cmd_get_data_direction(ioctx->scmnd);
1059
1060                 if (dir != SCST_DATA_NONE && scst_cmd_get_sg(ioctx->scmnd))
1061                         ib_dma_unmap_sg(ch->sport->sdev->device,
1062                                         scst_cmd_get_sg(ioctx->scmnd),
1063                                         scst_cmd_get_sg_cnt(ioctx->scmnd),
1064                                         scst_to_tgt_dma_dir(dir));
1065
1066                 WARN_ON(ioctx->scmnd->state != SCST_CMD_STATE_XMIT_WAIT);
1067                 scst_tgt_cmd_done(ioctx->scmnd, context);
1068         } else
1069                 srpt_reset_ioctx(ch, ioctx);
1070 }
1071
1072 /** Process an RDMA completion notification. */
1073 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
1074                                   struct srpt_ioctx *ioctx)
1075 {
1076         if (!ioctx->scmnd) {
1077                 WARN_ON("ERROR: ioctx->scmnd == NULL");
1078                 srpt_reset_ioctx(ch, ioctx);
1079                 return;
1080         }
1081
1082         /*
1083          * If an RDMA completion notification has been received for a write
1084          * command, tell SCST that processing can continue by calling
1085          * scst_rx_data().
1086          */
1087         if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
1088                                 SRPT_STATE_DATA_IN) == SRPT_STATE_NEED_DATA) {
1089                 WARN_ON(scst_cmd_get_data_direction(ioctx->scmnd)
1090                         == SCST_DATA_READ);
1091                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
1092                              scst_estimate_context());
1093         }
1094 }
1095
1096 /**
1097  * Build an SRP_RSP response.
1098  * @ch: RDMA channel through which the request has been received.
1099  * @ioctx: I/O context in which the SRP_RSP response will be built.
1100  * @s_key: sense key that will be stored in the response.
1101  * @s_code: value that will be stored in the asc_ascq field of the sense data.
1102  * @tag: tag of the request for which this response is being generated.
1103  *
1104  * Returns the size in bytes of the SRP_RSP response.
1105  *
1106  * An SRP_RSP response contains a SCSI status or service response. See also
1107  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1108  * response. See also SPC-2 for more information about sense data.
1109  */
1110 static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
1111                               struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
1112                               u64 tag)
1113 {
1114         struct srp_rsp *srp_rsp;
1115         struct sense_data *sense;
1116         int limit_delta;
1117         int sense_data_len = 0;
1118
1119         srp_rsp = ioctx->buf;
1120         memset(srp_rsp, 0, sizeof *srp_rsp);
1121
1122         limit_delta = atomic_read(&ch->req_lim_delta);
1123         atomic_sub(limit_delta, &ch->req_lim_delta);
1124
1125         srp_rsp->opcode = SRP_RSP;
1126         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1127         srp_rsp->tag = tag;
1128
1129         if (s_key != NO_SENSE) {
1130                 sense_data_len = sizeof *sense + (sizeof *sense % 4);
1131                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
1132                 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
1133                 srp_rsp->sense_data_len = cpu_to_be32(sense_data_len);
1134
1135                 sense = (struct sense_data *)(srp_rsp + 1);
1136                 sense->err_code = 0x70;
1137                 sense->key = s_key;
1138                 sense->asc_ascq = s_code;
1139         }
1140
1141         return sizeof(*srp_rsp) + sense_data_len;
1142 }
1143
1144 /**
1145  * Build a task management response, which is a specific SRP_RSP response.
1146  * @ch: RDMA channel through which the request has been received.
1147  * @ioctx: I/O context in which the SRP_RSP response will be built.
1148  * @rsp_code: RSP_CODE that will be stored in the response.
1149  * @tag: tag of the request for which this response is being generated.
1150  *
1151  * Returns the size in bytes of the SRP_RSP response.
1152  *
1153  * An SRP_RSP response contains a SCSI status or service response. See also
1154  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1155  * response.
1156  */
1157 static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
1158                                   struct srpt_ioctx *ioctx, u8 rsp_code,
1159                                   u64 tag)
1160 {
1161         struct srp_rsp *srp_rsp;
1162         int limit_delta;
1163         int resp_data_len = 0;
1164
1165         ib_dma_sync_single_for_cpu(ch->sport->sdev->device, ioctx->dma,
1166                                    MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
1167
1168         srp_rsp = ioctx->buf;
1169         memset(srp_rsp, 0, sizeof *srp_rsp);
1170
1171         limit_delta = atomic_read(&ch->req_lim_delta);
1172         atomic_sub(limit_delta, &ch->req_lim_delta);
1173
1174         srp_rsp->opcode = SRP_RSP;
1175         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1176         srp_rsp->tag = tag;
1177
1178         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
1179                 resp_data_len = 4;
1180                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
1181                 srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
1182                 srp_rsp->data[3] = rsp_code;
1183         }
1184
1185         return sizeof(*srp_rsp) + resp_data_len;
1186 }
1187
1188 /*
1189  * Process SRP_CMD.
1190  */
1191 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
1192 {
1193         struct scst_cmd *scmnd;
1194         struct srp_cmd *srp_cmd;
1195         struct srp_rsp *srp_rsp;
1196         scst_data_direction dir;
1197         int indirect_desc = 0;
1198         int ret;
1199         unsigned long flags;
1200
1201         srp_cmd = ioctx->buf;
1202         srp_rsp = ioctx->buf;
1203
1204         dir = SCST_DATA_NONE;
1205         if (srp_cmd->buf_fmt) {
1206                 ret = srpt_get_desc_tbl(ioctx, srp_cmd, &indirect_desc);
1207                 if (ret) {
1208                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1209                                            NO_ADD_SENSE, srp_cmd->tag);
1210                         srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1211                         goto err;
1212                 }
1213
1214                 if (indirect_desc) {
1215                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1216                                            NO_ADD_SENSE, srp_cmd->tag);
1217                         srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1218                         goto err;
1219                 }
1220
1221                 /*
1222                  * The lower four bits of the buffer format field contain the
1223                  * DATA-IN buffer descriptor format, and the highest four bits
1224                  * contain the DATA-OUT buffer descriptor format.
1225                  */
1226                 if (srp_cmd->buf_fmt & 0xf)
1227                         /* DATA-IN: transfer data from target to initiator. */
1228                         dir = SCST_DATA_READ;
1229                 else if (srp_cmd->buf_fmt >> 4)
1230                         /* DATA-OUT: transfer data from initiator to target. */
1231                         dir = SCST_DATA_WRITE;
1232         }
1233
1234         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
1235                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
1236                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
1237         if (!scmnd) {
1238                 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
1239                                    NO_ADD_SENSE, srp_cmd->tag);
1240                 srp_rsp->status = SAM_STAT_TASK_SET_FULL;
1241                 goto err;
1242         }
1243
1244         ioctx->scmnd = scmnd;
1245
1246         switch (srp_cmd->task_attr) {
1247         case SRP_CMD_HEAD_OF_Q:
1248                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
1249                 break;
1250         case SRP_CMD_ORDERED_Q:
1251                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1252                 break;
1253         case SRP_CMD_SIMPLE_Q:
1254                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
1255                 break;
1256         case SRP_CMD_ACA:
1257                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
1258                 break;
1259         default:
1260                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1261                 break;
1262         }
1263
1264         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1265         scst_cmd_set_tgt_priv(scmnd, ioctx);
1266         scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
1267
1268         spin_lock_irqsave(&ch->spinlock, flags);
1269         list_add_tail(&ioctx->scmnd_list, &ch->active_scmnd_list);
1270         ch->active_scmnd_cnt++;
1271         spin_unlock_irqrestore(&ch->spinlock, flags);
1272
1273         scst_cmd_init_done(scmnd, scst_estimate_context());
1274
1275         return 0;
1276
1277 err:
1278         WARN_ON(srp_rsp->opcode != SRP_RSP);
1279
1280         return -1;
1281 }
1282
1283 /*
1284  * Process an SRP_TSK_MGMT request.
1285  *
1286  * Returns 0 upon success and -1 upon failure.
1287  *
1288  * Each task management function is performed by calling one of the
1289  * scst_rx_mgmt_fn*() functions. These functions will either report failure
1290  * or process the task management function asynchronously. The function
1291  * srpt_tsk_mgmt_done() will be called by the SCST core upon completion of the
1292  * task management function. When srpt_handle_tsk_mgmt() reports failure
1293  * (i.e. returns -1) a response will have been built in ioctx->buf. This
1294  * information unit has to be sent back by the caller.
1295  *
1296  * For more information about SRP_TSK_MGMT information units, see also section
1297  * 6.7 in the T10 SRP r16a document.
1298  */
1299 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1300                                 struct srpt_ioctx *ioctx)
1301 {
1302         struct srp_tsk_mgmt *srp_tsk;
1303         struct srpt_mgmt_ioctx *mgmt_ioctx;
1304         int ret;
1305
1306         srp_tsk = ioctx->buf;
1307
1308         TRACE_DBG("recv_tsk_mgmt= %d for task_tag= %lld"
1309                   " using tag= %lld cm_id= %p sess= %p",
1310                   srp_tsk->tsk_mgmt_func,
1311                   (unsigned long long) srp_tsk->task_tag,
1312                   (unsigned long long) srp_tsk->tag,
1313                   ch->cm_id, ch->scst_sess);
1314
1315         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1316         if (!mgmt_ioctx) {
1317                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1318                                        srp_tsk->tag);
1319                 goto err;
1320         }
1321
1322         mgmt_ioctx->ioctx = ioctx;
1323         mgmt_ioctx->ch = ch;
1324         mgmt_ioctx->tag = srp_tsk->tag;
1325
1326         switch (srp_tsk->tsk_mgmt_func) {
1327         case SRP_TSK_ABORT_TASK:
1328                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK");
1329                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1330                                           SCST_ABORT_TASK,
1331                                           srp_tsk->task_tag,
1332                                           thread ?
1333                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1334                                           mgmt_ioctx);
1335                 break;
1336         case SRP_TSK_ABORT_TASK_SET:
1337                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK_SET");
1338                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1339                                           SCST_ABORT_TASK_SET,
1340                                           (u8 *) &srp_tsk->lun,
1341                                           sizeof srp_tsk->lun,
1342                                           thread ?
1343                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1344                                           mgmt_ioctx);
1345                 break;
1346         case SRP_TSK_CLEAR_TASK_SET:
1347                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_TASK_SET");
1348                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1349                                           SCST_CLEAR_TASK_SET,
1350                                           (u8 *) &srp_tsk->lun,
1351                                           sizeof srp_tsk->lun,
1352                                           thread ?
1353                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1354                                           mgmt_ioctx);
1355                 break;
1356         case SRP_TSK_LUN_RESET:
1357                 TRACE_DBG("%s", "Processing SRP_TSK_LUN_RESET");
1358                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1359                                           SCST_LUN_RESET,
1360                                           (u8 *) &srp_tsk->lun,
1361                                           sizeof srp_tsk->lun,
1362                                           thread ?
1363                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1364                                           mgmt_ioctx);
1365                 break;
1366         case SRP_TSK_CLEAR_ACA:
1367                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_ACA");
1368                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1369                                           SCST_CLEAR_ACA,
1370                                           (u8 *) &srp_tsk->lun,
1371                                           sizeof srp_tsk->lun,
1372                                           thread ?
1373                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1374                                           mgmt_ioctx);
1375                 break;
1376         default:
1377                 TRACE_DBG("%s", "Unsupported task management function.");
1378                 srpt_build_tskmgmt_rsp(ch, ioctx,
1379                                        SRP_TSK_MGMT_FUNC_NOT_SUPP,
1380                                        srp_tsk->tag);
1381                 goto err;
1382         }
1383
1384         if (ret) {
1385                 TRACE_DBG("%s", "Processing task management function failed.");
1386                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1387                                        srp_tsk->tag);
1388                 goto err;
1389         }
1390
1391         WARN_ON(srp_tsk->opcode == SRP_RSP);
1392
1393         return 0;
1394
1395 err:
1396         WARN_ON(srp_tsk->opcode != SRP_RSP);
1397
1398         kfree(mgmt_ioctx);
1399         return -1;
1400 }
1401
1402 /**
1403  * Process a receive completion event.
1404  * @ch: RDMA channel for which the completion event has been received.
1405  * @ioctx: SRPT I/O context for which the completion event has been received.
1406  */
1407 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1408                                struct srpt_ioctx *ioctx)
1409 {
1410         struct srp_cmd *srp_cmd;
1411         struct srp_rsp *srp_rsp;
1412         unsigned long flags;
1413         int len;
1414
1415         spin_lock_irqsave(&ch->spinlock, flags);
1416         if (ch->state != RDMA_CHANNEL_LIVE) {
1417                 if (ch->state == RDMA_CHANNEL_CONNECTING) {
1418                         list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1419                         spin_unlock_irqrestore(&ch->spinlock, flags);
1420                         return;
1421                 } else {
1422                         spin_unlock_irqrestore(&ch->spinlock, flags);
1423                         srpt_reset_ioctx(ch, ioctx);
1424                         return;
1425                 }
1426         }
1427         spin_unlock_irqrestore(&ch->spinlock, flags);
1428
1429         ib_dma_sync_single_for_cpu(ch->sport->sdev->device, ioctx->dma,
1430                                    MAX_MESSAGE_SIZE, DMA_FROM_DEVICE);
1431
1432         ioctx->data_len = 0;
1433         ioctx->n_rbuf = 0;
1434         ioctx->rbufs = NULL;
1435         ioctx->n_rdma = 0;
1436         ioctx->n_rdma_ius = 0;
1437         ioctx->rdma_ius = NULL;
1438         ioctx->scmnd = NULL;
1439         ioctx->ch = ch;
1440         atomic_set(&ioctx->state, SRPT_STATE_NEW);
1441
1442         srp_cmd = ioctx->buf;
1443         srp_rsp = ioctx->buf;
1444
1445         switch (srp_cmd->opcode) {
1446         case SRP_CMD:
1447                 if (srpt_handle_cmd(ch, ioctx) < 0)
1448                         goto err;
1449                 break;
1450
1451         case SRP_TSK_MGMT:
1452                 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1453                         goto err;
1454                 break;
1455
1456         case SRP_I_LOGOUT:
1457         case SRP_AER_REQ:
1458         default:
1459                 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1460                                    srp_cmd->tag);
1461                 goto err;
1462         }
1463
1464         ib_dma_sync_single_for_device(ch->sport->sdev->device,
1465                                    ioctx->dma, MAX_MESSAGE_SIZE,
1466                                    DMA_FROM_DEVICE);
1467
1468         return;
1469
1470 err:
1471         WARN_ON(srp_rsp->opcode != SRP_RSP);
1472         len = (sizeof *srp_rsp) + be32_to_cpu(srp_rsp->sense_data_len);
1473
1474         if (ch->state != RDMA_CHANNEL_LIVE) {
1475                 /* Give up if another thread modified the channel state. */
1476                 PRINT_ERROR("%s: channel is in state %d", __func__, ch->state);
1477                 srpt_reset_ioctx(ch, ioctx);
1478         } else if (srpt_post_send(ch, ioctx, len)) {
1479                 PRINT_ERROR("%s: sending SRP_RSP response failed", __func__);
1480                 srpt_reset_ioctx(ch, ioctx);
1481         }
1482 }
1483
1484 /*
1485  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1486  * should stop.
1487  * @pre thread != 0
1488  */
1489 static inline int srpt_test_ioctx_list(void)
1490 {
1491         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1492                    unlikely(kthread_should_stop()));
1493         return res;
1494 }
1495
1496 /*
1497  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1498  *
1499  * @pre thread != 0
1500  */
1501 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1502 {
1503         unsigned long flags;
1504
1505         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1506         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1507         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1508         wake_up(&ioctx_list_waitQ);
1509 }
1510
1511 /**
1512  * InfiniBand completion queue callback function.
1513  * @cq: completion queue.
1514  * @ctx: completion queue context, which was passed as the fourth argument of
1515  *       the function ib_create_cq().
1516  */
1517 static void srpt_completion(struct ib_cq *cq, void *ctx)
1518 {
1519         struct srpt_rdma_ch *ch = ctx;
1520         struct srpt_device *sdev = ch->sport->sdev;
1521         struct ib_wc wc;
1522         struct srpt_ioctx *ioctx;
1523
1524         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1525         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1526                 if (wc.status) {
1527                         PRINT_ERROR("failed %s status= %d",
1528                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1529                                wc.status);
1530                         srpt_handle_err_comp(ch, &wc);
1531                         break;
1532                 }
1533
1534                 if (wc.wr_id & SRPT_OP_RECV) {
1535                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1536                         if (thread) {
1537                                 ioctx->ch = ch;
1538                                 ioctx->op = IB_WC_RECV;
1539                                 srpt_schedule_thread(ioctx);
1540                         } else
1541                                 srpt_handle_new_iu(ch, ioctx);
1542                         continue;
1543                 } else
1544                         ioctx = sdev->ioctx_ring[wc.wr_id];
1545
1546                 if (thread) {
1547                         ioctx->ch = ch;
1548                         ioctx->op = wc.opcode;
1549                         srpt_schedule_thread(ioctx);
1550                 } else {
1551                         switch (wc.opcode) {
1552                         case IB_WC_SEND:
1553                                 srpt_handle_send_comp(ch, ioctx,
1554                                         scst_estimate_context());
1555                                 break;
1556                         case IB_WC_RDMA_WRITE:
1557                         case IB_WC_RDMA_READ:
1558                                 srpt_handle_rdma_comp(ch, ioctx);
1559                                 break;
1560                         default:
1561                                 break;
1562                         }
1563                 }
1564
1565 #if defined(CONFIG_SCST_DEBUG)
1566                 if (interrupt_processing_delay_in_us <= MAX_UDELAY_MS * 1000)
1567                         udelay(interrupt_processing_delay_in_us);
1568 #endif
1569         }
1570 }
1571
1572 /*
1573  * Create a completion queue on the specified device.
1574  */
1575 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1576 {
1577         struct ib_qp_init_attr *qp_init;
1578         struct srpt_device *sdev = ch->sport->sdev;
1579         int cqe;
1580         int ret;
1581
1582         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1583         if (!qp_init)
1584                 return -ENOMEM;
1585
1586         /* Create a completion queue (CQ). */
1587
1588         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1589 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1590         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1591 #else
1592         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1593 #endif
1594         if (IS_ERR(ch->cq)) {
1595                 ret = PTR_ERR(ch->cq);
1596                 PRINT_ERROR("failed to create_cq cqe= %d ret= %d", cqe, ret);
1597                 goto out;
1598         }
1599
1600         /* Request completion notification. */
1601
1602         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1603
1604         /* Create a queue pair (QP). */
1605
1606         qp_init->qp_context = (void *)ch;
1607         qp_init->event_handler = srpt_qp_event;
1608         qp_init->send_cq = ch->cq;
1609         qp_init->recv_cq = ch->cq;
1610         qp_init->srq = sdev->srq;
1611         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1612         qp_init->qp_type = IB_QPT_RC;
1613         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1614         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1615
1616         ch->qp = ib_create_qp(sdev->pd, qp_init);
1617         if (IS_ERR(ch->qp)) {
1618                 ret = PTR_ERR(ch->qp);
1619                 ib_destroy_cq(ch->cq);
1620                 PRINT_ERROR("failed to create_qp ret= %d", ret);
1621                 goto out;
1622         }
1623
1624         TRACE_DBG("%s: max_cqe= %d max_sge= %d cm_id= %p",
1625                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1626                ch->cm_id);
1627
1628         /* Modify the attributes and the state of queue pair ch->qp. */
1629
1630         ret = srpt_init_ch_qp(ch, ch->qp);
1631         if (ret) {
1632                 ib_destroy_qp(ch->qp);
1633                 ib_destroy_cq(ch->cq);
1634                 goto out;
1635         }
1636
1637         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1638 out:
1639         kfree(qp_init);
1640         return ret;
1641 }
1642
1643 /**
1644  * Look up the RDMA channel that corresponds to the specified cm_id.
1645  *
1646  * Return NULL if no matching RDMA channel has been found.
1647  */
1648 static struct srpt_rdma_ch *srpt_find_channel(struct ib_cm_id *cm_id, bool del)
1649 {
1650         struct srpt_device *sdev = cm_id->context;
1651         struct srpt_rdma_ch *ch;
1652
1653         spin_lock_irq(&sdev->spinlock);
1654         list_for_each_entry(ch, &sdev->rch_list, list) {
1655                 if (ch->cm_id == cm_id) {
1656                         if (del)
1657                                 list_del(&ch->list);
1658                         spin_unlock_irq(&sdev->spinlock);
1659                         return ch;
1660                 }
1661         }
1662
1663         spin_unlock_irq(&sdev->spinlock);
1664
1665         return NULL;
1666 }
1667
1668 /**
1669  * Release all resources associated with the specified RDMA channel.
1670  *
1671  * Note: the caller must have removed the channel from the channel list
1672  * before calling this function.
1673  */
1674 static void srpt_release_channel(struct srpt_rdma_ch *ch, int destroy_cmid)
1675 {
1676         TRACE_ENTRY();
1677
1678         WARN_ON(srpt_find_channel(ch->cm_id, false) == ch);
1679
1680         if (ch->cm_id && destroy_cmid) {
1681                 TRACE_DBG("%s: destroy cm_id= %p", __func__, ch->cm_id);
1682                 ib_destroy_cm_id(ch->cm_id);
1683                 ch->cm_id = NULL;
1684         }
1685
1686         ib_destroy_qp(ch->qp);
1687         ib_destroy_cq(ch->cq);
1688
1689         if (ch->scst_sess) {
1690                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1691
1692                 if (ch->active_scmnd_cnt)
1693                         PRINT_INFO("Releasing session %s which still has %d"
1694                                    " active commands",
1695                                    ch->sess_name, ch->active_scmnd_cnt);
1696                 else
1697                         PRINT_INFO("Releasing session %s", ch->sess_name);
1698
1699                 spin_lock_irq(&ch->spinlock);
1700                 list_for_each_entry_safe(ioctx, ioctx_tmp,
1701                                          &ch->active_scmnd_list, scmnd_list) {
1702                         spin_unlock_irq(&ch->spinlock);
1703
1704                         if (ioctx->scmnd)
1705                                 srpt_abort_scst_cmd(ch->sport->sdev,
1706                                                     ioctx->scmnd, true);
1707
1708                         spin_lock_irq(&ch->spinlock);
1709                 }
1710                 WARN_ON(!list_empty(&ch->active_scmnd_list));
1711                 WARN_ON(ch->active_scmnd_cnt != 0);
1712                 spin_unlock_irq(&ch->spinlock);
1713
1714                 scst_unregister_session(ch->scst_sess, 0, NULL);
1715                 ch->scst_sess = NULL;
1716         }
1717
1718         kfree(ch);
1719
1720         TRACE_EXIT();
1721 }
1722
1723 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1724                             struct ib_cm_req_event_param *param,
1725                             void *private_data)
1726 {
1727         struct srpt_device *sdev = cm_id->context;
1728         struct srp_login_req *req;
1729         struct srp_login_rsp *rsp;
1730         struct srp_login_rej *rej;
1731         struct ib_cm_rep_param *rep_param;
1732         struct srpt_rdma_ch *ch, *tmp_ch;
1733         u32 it_iu_len;
1734         int ret = 0;
1735
1736 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
1737         WARN_ON(!sdev || !private_data);
1738         if (!sdev || !private_data)
1739                 return -EINVAL;
1740 #else
1741         if (WARN_ON(!sdev || !private_data))
1742                 return -EINVAL;
1743 #endif
1744
1745         req = (struct srp_login_req *)private_data;
1746
1747         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1748
1749         PRINT_INFO("Received SRP_LOGIN_REQ with"
1750             " i_port_id 0x%llx:0x%llx, t_port_id 0x%llx:0x%llx and length %d"
1751             " on port %d (guid=0x%llx:0x%llx)",
1752             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1753             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1754             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1755             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1756             it_iu_len,
1757             param->port,
1758             (unsigned long long)be64_to_cpu(*(u64 *)
1759                                 &sdev->port[param->port - 1].gid.raw[0]),
1760             (unsigned long long)be64_to_cpu(*(u64 *)
1761                                 &sdev->port[param->port - 1].gid.raw[8]));
1762
1763         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1764         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1765         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1766
1767         if (!rsp || !rej || !rep_param) {
1768                 ret = -ENOMEM;
1769                 goto out;
1770         }
1771
1772         if (it_iu_len > MAX_MESSAGE_SIZE || it_iu_len < 64) {
1773                 rej->reason =
1774                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1775                 ret = -EINVAL;
1776                 PRINT_ERROR("rejected SRP_LOGIN_REQ because its"
1777                             " length (%d bytes) is invalid", it_iu_len);
1778                 goto reject;
1779         }
1780
1781         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1782                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1783
1784                 spin_lock_irq(&sdev->spinlock);
1785
1786                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1787                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1788                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1789                             && param->port == ch->sport->port
1790                             && param->listen_id == ch->sport->sdev->cm_id
1791                             && ch->cm_id) {
1792                                 enum rdma_ch_state prev_state;
1793
1794                                 /* found an existing channel */
1795                                 TRACE_DBG("Found existing channel name= %s"
1796                                           " cm_id= %p state= %d",
1797                                           ch->sess_name, ch->cm_id, ch->state);
1798
1799                                 prev_state = ch->state;
1800                                 if (ch->state == RDMA_CHANNEL_LIVE)
1801                                         ch->state = RDMA_CHANNEL_DISCONNECTING;
1802                                 else if (ch->state == RDMA_CHANNEL_CONNECTING)
1803                                         list_del(&ch->list);
1804
1805                                 spin_unlock_irq(&sdev->spinlock);
1806
1807                                 rsp->rsp_flags =
1808                                         SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1809
1810                                 if (prev_state == RDMA_CHANNEL_LIVE) {
1811                                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
1812                                         PRINT_INFO("disconnected"
1813                                           " session %s because a new"
1814                                           " SRP_LOGIN_REQ has been received.",
1815                                           ch->sess_name);
1816                                 } else if (prev_state ==
1817                                          RDMA_CHANNEL_CONNECTING) {
1818                                         PRINT_ERROR("%s", "rejected"
1819                                           " SRP_LOGIN_REQ because another login"
1820                                           " request is being processed.");
1821                                         ib_send_cm_rej(ch->cm_id,
1822                                                        IB_CM_REJ_NO_RESOURCES,
1823                                                        NULL, 0, NULL, 0);
1824                                         srpt_release_channel(ch, 1);
1825                                 }
1826
1827                                 spin_lock_irq(&sdev->spinlock);
1828                         }
1829                 }
1830
1831                 spin_unlock_irq(&sdev->spinlock);
1832
1833         } else
1834                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1835
1836         if (((u64) (*(u64 *) req->target_port_id) !=
1837              cpu_to_be64(srpt_service_guid)) ||
1838             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1839              cpu_to_be64(srpt_service_guid))) {
1840                 rej->reason =
1841                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1842                 ret = -ENOMEM;
1843                 PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because it"
1844                        " has an invalid target port identifier.");
1845                 goto reject;
1846         }
1847
1848         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1849         if (!ch) {
1850                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1851                 PRINT_ERROR("%s",
1852                             "rejected SRP_LOGIN_REQ because out of memory.");
1853                 ret = -ENOMEM;
1854                 goto reject;
1855         }
1856
1857         spin_lock_init(&ch->spinlock);
1858         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1859         memcpy(ch->t_port_id, req->target_port_id, 16);
1860         ch->sport = &sdev->port[param->port - 1];
1861         ch->cm_id = cm_id;
1862         ch->state = RDMA_CHANNEL_CONNECTING;
1863         INIT_LIST_HEAD(&ch->cmd_wait_list);
1864         INIT_LIST_HEAD(&ch->active_scmnd_list);
1865
1866         ret = srpt_create_ch_ib(ch);
1867         if (ret) {
1868                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1869                 PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because creating"
1870                             " a new RDMA channel failed.");
1871                 goto free_ch;
1872         }
1873
1874         ret = srpt_ch_qp_rtr(ch, ch->qp);
1875         if (ret) {
1876                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1877                 PRINT_ERROR("rejected SRP_LOGIN_REQ because enabling"
1878                        " RTR failed (error code = %d)", ret);
1879                 goto destroy_ib;
1880         }
1881
1882         if (use_port_guid_in_session_name) {
1883                 /*
1884                  * If the kernel module parameter use_port_guid_in_session_name
1885                  * has been specified, use a combination of the target port
1886                  * GUID and the initiator port ID as the session name. This
1887                  * was the original behavior of the SRP target implementation
1888                  * (i.e. before the SRPT was included in OFED 1.3).
1889                  */
1890                 snprintf(ch->sess_name, sizeof(ch->sess_name),
1891                          "0x%016llx%016llx",
1892                          (unsigned long long)be64_to_cpu(*(u64 *)
1893                                 &sdev->port[param->port - 1].gid.raw[8]),
1894                          (unsigned long long)be64_to_cpu(*(u64 *)
1895                                 (ch->i_port_id + 8)));
1896         } else {
1897                 /*
1898                  * Default behavior: use the initator port identifier as the
1899                  * session name.
1900                  */
1901                 snprintf(ch->sess_name, sizeof(ch->sess_name),
1902                          "0x%016llx%016llx",
1903                          (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1904                          (unsigned long long)be64_to_cpu(*(u64 *)
1905                                  (ch->i_port_id + 8)));
1906         }
1907
1908         TRACE_DBG("registering session %s", ch->sess_name);
1909
1910         BUG_ON(!sdev->scst_tgt);
1911         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1912                                               NULL, NULL);
1913         if (!ch->scst_sess) {
1914                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1915                 TRACE_DBG("%s", "Failed to create scst sess");
1916                 goto destroy_ib;
1917         }
1918
1919         TRACE_DBG("Establish connection sess=%p name=%s cm_id=%p",
1920                   ch->scst_sess, ch->sess_name, ch->cm_id);
1921
1922         scst_sess_set_tgt_priv(ch->scst_sess, ch);
1923
1924         /* create srp_login_response */
1925         rsp->opcode = SRP_LOGIN_RSP;
1926         rsp->tag = req->tag;
1927         rsp->max_it_iu_len = req->req_it_iu_len;
1928         rsp->max_ti_iu_len = req->req_it_iu_len;
1929         rsp->buf_fmt =
1930             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1931         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1932         atomic_set(&ch->req_lim_delta, 0);
1933
1934         /* create cm reply */
1935         rep_param->qp_num = ch->qp->qp_num;
1936         rep_param->private_data = (void *)rsp;
1937         rep_param->private_data_len = sizeof *rsp;
1938         rep_param->rnr_retry_count = 7;
1939         rep_param->flow_control = 1;
1940         rep_param->failover_accepted = 0;
1941         rep_param->srq = 1;
1942         rep_param->responder_resources = 4;
1943         rep_param->initiator_depth = 4;
1944
1945         ret = ib_send_cm_rep(cm_id, rep_param);
1946         if (ret) {
1947                 PRINT_ERROR("sending SRP_LOGIN_REQ response failed"
1948                             " (error code = %d)", ret);
1949                 goto release_channel;
1950         }
1951
1952         spin_lock_irq(&sdev->spinlock);
1953         list_add_tail(&ch->list, &sdev->rch_list);
1954         spin_unlock_irq(&sdev->spinlock);
1955
1956         goto out;
1957
1958 release_channel:
1959         scst_unregister_session(ch->scst_sess, 0, NULL);
1960         ch->scst_sess = NULL;
1961
1962 destroy_ib:
1963         ib_destroy_qp(ch->qp);
1964         ib_destroy_cq(ch->cq);
1965
1966 free_ch:
1967         kfree(ch);
1968
1969 reject:
1970         rej->opcode = SRP_LOGIN_REJ;
1971         rej->tag = req->tag;
1972         rej->buf_fmt =
1973             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1974
1975         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1976                              (void *)rej, sizeof *rej);
1977
1978 out:
1979         kfree(rep_param);
1980         kfree(rsp);
1981         kfree(rej);
1982
1983         return ret;
1984 }
1985
1986 /**
1987  * Release the channel with the specified cm_id.
1988  *
1989  * Returns one to indicate that the caller of srpt_cm_handler() should destroy
1990  * the cm_id.
1991  */
1992 static void srpt_find_and_release_channel(struct ib_cm_id *cm_id)
1993 {
1994         struct srpt_rdma_ch *ch;
1995
1996         ch = srpt_find_channel(cm_id, true);
1997         if (ch)
1998                 srpt_release_channel(ch, 0);
1999 }
2000
2001 static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
2002 {
2003         PRINT_INFO("%s", "Received InfiniBand REJ packet.");
2004         srpt_find_and_release_channel(cm_id);
2005 }
2006
2007 /**
2008  * Process an IB_CM_RTU_RECEIVED or IB_CM_USER_ESTABLISHED event.
2009  *
2010  * An IB_CM_RTU_RECEIVED message indicates that the connection is established
2011  * and that the recipient may begin transmitting (RTU = ready to use).
2012  */
2013 static int srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
2014 {
2015         struct srpt_rdma_ch *ch;
2016         int ret;
2017
2018         ch = srpt_find_channel(cm_id, false);
2019         if (!ch)
2020                 return -EINVAL;
2021
2022         if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_CONNECTING,
2023                                             RDMA_CHANNEL_LIVE)) {
2024                 struct srpt_ioctx *ioctx, *ioctx_tmp;
2025
2026                 ret = srpt_ch_qp_rts(ch, ch->qp);
2027
2028                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
2029                                          wait_list) {
2030                         list_del(&ioctx->wait_list);
2031                         srpt_handle_new_iu(ch, ioctx);
2032                 }
2033                 if (ret && srpt_test_and_set_channel_state(ch,
2034                                         RDMA_CHANNEL_LIVE,
2035                                         RDMA_CHANNEL_DISCONNECTING)) {
2036                         TRACE_DBG("cm_id=%p sess_name=%s state=%d",
2037                                   cm_id, ch->sess_name, ch->state);
2038                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
2039                 }
2040         } else if (ch->state == RDMA_CHANNEL_DISCONNECTING) {
2041                 TRACE_DBG("cm_id=%p sess_name=%s state=%d",
2042                           cm_id, ch->sess_name, ch->state);
2043                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
2044                 ret = -EAGAIN;
2045         } else
2046                 ret = 0;
2047
2048         return ret;
2049 }
2050
2051 static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
2052 {
2053         PRINT_INFO("%s", "Received InfiniBand TimeWait exit.");
2054         srpt_find_and_release_channel(cm_id);
2055 }
2056
2057 static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
2058 {
2059         PRINT_INFO("%s", "Received InfiniBand REP error.");
2060         srpt_find_and_release_channel(cm_id);
2061 }
2062
2063 static int srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
2064 {
2065         struct srpt_rdma_ch *ch;
2066
2067         ch = srpt_find_channel(cm_id, false);
2068         if (!ch)
2069                 return -EINVAL;
2070
2071         TRACE_DBG("%s: cm_id= %p ch->state= %d",
2072                  __func__, cm_id, ch->state);
2073
2074         switch (ch->state) {
2075         case RDMA_CHANNEL_LIVE:
2076         case RDMA_CHANNEL_CONNECTING:
2077                 ib_send_cm_drep(ch->cm_id, NULL, 0);
2078                 PRINT_INFO("Received DREQ and sent DREP for session %s.",
2079                            ch->sess_name);
2080                 break;
2081         case RDMA_CHANNEL_DISCONNECTING:
2082         default:
2083                 break;
2084         }
2085
2086         return 0;
2087 }
2088
2089 static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
2090 {
2091         PRINT_INFO("%s", "Received InfiniBand DREP message.");
2092         srpt_find_and_release_channel(cm_id);
2093 }
2094
2095 /**
2096  * IB connection manager callback function.
2097  *
2098  * A non-zero return value will make the caller destroy the CM ID.
2099  *
2100  * Note: srpt_add_one passes a struct srpt_device* as the third argument to
2101  * the ib_create_cm_id() call.
2102  */
2103 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2104 {
2105         int ret = 0;
2106
2107         switch (event->event) {
2108         case IB_CM_REQ_RECEIVED:
2109                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
2110                                        event->private_data);
2111                 break;
2112         case IB_CM_REJ_RECEIVED:
2113                 srpt_cm_rej_recv(cm_id);
2114                 ret = -EINVAL;
2115                 break;
2116         case IB_CM_RTU_RECEIVED:
2117         case IB_CM_USER_ESTABLISHED:
2118                 ret = srpt_cm_rtu_recv(cm_id);
2119                 break;
2120         case IB_CM_DREQ_RECEIVED:
2121                 ret = srpt_cm_dreq_recv(cm_id);
2122                 break;
2123         case IB_CM_DREP_RECEIVED:
2124                 srpt_cm_drep_recv(cm_id);
2125                 ret = -EINVAL;
2126                 break;
2127         case IB_CM_TIMEWAIT_EXIT:
2128                 srpt_cm_timewait_exit(cm_id);
2129                 ret = -EINVAL;
2130                 break;
2131         case IB_CM_REP_ERROR:
2132                 srpt_cm_rep_error(cm_id);
2133                 ret = -EINVAL;
2134                 break;
2135         default:
2136                 break;
2137         }
2138
2139         return ret;
2140 }
2141
2142 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
2143                                  struct srpt_ioctx *ioctx,
2144                                  struct scst_cmd *scmnd)
2145 {
2146         struct scatterlist *scat;
2147         scst_data_direction dir;
2148         struct rdma_iu *riu;
2149         struct srp_direct_buf *db;
2150         dma_addr_t dma_addr;
2151         struct ib_sge *sge;
2152         u64 raddr;
2153         u32 rsize;
2154         u32 tsize;
2155         u32 dma_len;
2156         int count, nrdma;
2157         int i, j, k;
2158
2159         scat = scst_cmd_get_sg(scmnd);
2160         dir = scst_cmd_get_data_direction(scmnd);
2161         WARN_ON(scat == NULL);
2162         count = ib_dma_map_sg(ch->sport->sdev->device, scat,
2163                               scst_cmd_get_sg_cnt(scmnd),
2164                               scst_to_tgt_dma_dir(dir));
2165         if (unlikely(!count))
2166                 return -EBUSY;
2167
2168         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
2169                 nrdma = ioctx->n_rdma_ius;
2170         else {
2171                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
2172
2173                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
2174                                           scst_cmd_atomic(scmnd)
2175                                           ? GFP_ATOMIC : GFP_KERNEL);
2176                 if (!ioctx->rdma_ius) {
2177                         WARN_ON(scat == NULL);
2178                         ib_dma_unmap_sg(ch->sport->sdev->device,
2179                                         scat, scst_cmd_get_sg_cnt(scmnd),
2180                                         scst_to_tgt_dma_dir(dir));
2181                         return -ENOMEM;
2182                 }
2183
2184                 ioctx->n_rdma_ius = nrdma;
2185         }
2186
2187         db = ioctx->rbufs;
2188         tsize = (dir == SCST_DATA_READ) ?
2189                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2190         dma_len = sg_dma_len(&scat[0]);
2191         riu = ioctx->rdma_ius;
2192
2193         /*
2194          * For each remote desc - calculate the #ib_sge.
2195          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
2196          *      each remote desc rdma_iu is required a rdma wr;
2197          * else
2198          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
2199          *      another rdma wr
2200          */
2201         for (i = 0, j = 0;
2202              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2203                 rsize = be32_to_cpu(db->len);
2204                 raddr = be64_to_cpu(db->va);
2205                 riu->raddr = raddr;
2206                 riu->rkey = be32_to_cpu(db->key);
2207                 riu->sge_cnt = 0;
2208
2209                 /* calculate how many sge required for this remote_buf */
2210                 while (rsize > 0 && tsize > 0) {
2211
2212                         if (rsize >= dma_len) {
2213                                 tsize -= dma_len;
2214                                 rsize -= dma_len;
2215                                 raddr += dma_len;
2216
2217                                 if (tsize > 0) {
2218                                         ++j;
2219                                         if (j < count)
2220                                                 dma_len = sg_dma_len(&scat[j]);
2221                                 }
2222                         } else {
2223                                 tsize -= rsize;
2224                                 dma_len -= rsize;
2225                                 rsize = 0;
2226                         }
2227
2228                         ++riu->sge_cnt;
2229
2230                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
2231                                 riu->sge =
2232                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
2233                                             scst_cmd_atomic(scmnd)
2234                                             ? GFP_ATOMIC : GFP_KERNEL);
2235                                 if (!riu->sge)
2236                                         goto free_mem;
2237
2238                                 ++ioctx->n_rdma;
2239                                 ++riu;
2240                                 riu->sge_cnt = 0;
2241                                 riu->raddr = raddr;
2242                                 riu->rkey = be32_to_cpu(db->key);
2243                         }
2244                 }
2245
2246                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
2247                                    scst_cmd_atomic(scmnd)
2248                                    ? GFP_ATOMIC : GFP_KERNEL);
2249
2250                 if (!riu->sge)
2251                         goto free_mem;
2252
2253                 ++ioctx->n_rdma;
2254         }
2255
2256         db = ioctx->rbufs;
2257         scat = scst_cmd_get_sg(scmnd);
2258         tsize = (dir == SCST_DATA_READ) ?
2259                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2260         riu = ioctx->rdma_ius;
2261         dma_len = sg_dma_len(&scat[0]);
2262         dma_addr = sg_dma_address(&scat[0]);
2263
2264         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
2265         for (i = 0, j = 0;
2266              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2267                 rsize = be32_to_cpu(db->len);
2268                 sge = riu->sge;
2269                 k = 0;
2270
2271                 while (rsize > 0 && tsize > 0) {
2272                         sge->addr = dma_addr;
2273                         sge->lkey = ch->sport->sdev->mr->lkey;
2274
2275                         if (rsize >= dma_len) {
2276                                 sge->length =
2277                                         (tsize < dma_len) ? tsize : dma_len;
2278                                 tsize -= dma_len;
2279                                 rsize -= dma_len;
2280
2281                                 if (tsize > 0) {
2282                                         ++j;
2283                                         if (j < count) {
2284                                                 dma_len = sg_dma_len(&scat[j]);
2285                                                 dma_addr =
2286                                                     sg_dma_address(&scat[j]);
2287                                         }
2288                                 }
2289                         } else {
2290                                 sge->length = (tsize < rsize) ? tsize : rsize;
2291                                 tsize -= rsize;
2292                                 dma_len -= rsize;
2293                                 dma_addr += rsize;
2294                                 rsize = 0;
2295                         }
2296
2297                         ++k;
2298                         if (k == riu->sge_cnt && rsize > 0) {
2299                                 ++riu;
2300                                 sge = riu->sge;
2301                                 k = 0;
2302                         } else if (rsize > 0)
2303                                 ++sge;
2304                 }
2305         }
2306
2307         return 0;
2308
2309 free_mem:
2310         while (ioctx->n_rdma)
2311                 kfree(ioctx->rdma_ius[ioctx->n_rdma--].sge);
2312
2313         kfree(ioctx->rdma_ius);
2314
2315         WARN_ON(scat == NULL);
2316         ib_dma_unmap_sg(ch->sport->sdev->device,
2317                         scat, scst_cmd_get_sg_cnt(scmnd),
2318                         scst_to_tgt_dma_dir(dir));
2319
2320         return -ENOMEM;
2321 }
2322
2323 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2324                               scst_data_direction dir)
2325 {
2326         struct ib_send_wr wr;
2327         struct ib_send_wr *bad_wr;
2328         struct rdma_iu *riu;
2329         int i;
2330         int ret = 0;
2331
2332         riu = ioctx->rdma_ius;
2333         memset(&wr, 0, sizeof wr);
2334
2335         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
2336                 wr.opcode = (dir == SCST_DATA_READ) ?
2337                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
2338                 wr.next = NULL;
2339                 wr.wr_id = ioctx->index;
2340                 wr.wr.rdma.remote_addr = riu->raddr;
2341                 wr.wr.rdma.rkey = riu->rkey;
2342                 wr.num_sge = riu->sge_cnt;
2343                 wr.sg_list = riu->sge;
2344
2345                 /* only get completion event for the last rdma wr */
2346                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
2347                         wr.send_flags = IB_SEND_SIGNALED;
2348
2349                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
2350                 if (ret)
2351                         break;
2352         }
2353
2354         return ret;
2355 }
2356
2357 /*
2358  * Start data transfer between initiator and target. Must not block.
2359  */
2360 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2361                           struct scst_cmd *scmnd)
2362 {
2363         int ret;
2364
2365         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
2366         if (ret) {
2367                 PRINT_ERROR("%s[%d] ret=%d", __func__, __LINE__, ret);
2368                 ret = SCST_TGT_RES_QUEUE_FULL;
2369                 goto out;
2370         }
2371
2372         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
2373         if (ret) {
2374                 PRINT_ERROR("%s[%d] ret=%d", __func__, __LINE__, ret);
2375                 if (ret == -EAGAIN || ret == -ENOMEM)
2376                         ret = SCST_TGT_RES_QUEUE_FULL;
2377                 else
2378                         ret = SCST_TGT_RES_FATAL_ERROR;
2379                 goto out;
2380         }
2381
2382         ret = SCST_TGT_RES_SUCCESS;
2383
2384 out:
2385         return ret;
2386 }
2387
2388 /*
2389  * Called by the SCST core to inform ib_srpt that data reception from the
2390  * initiator should start (SCST_DATA_WRITE). Must not block.
2391  */
2392 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
2393 {
2394         struct srpt_rdma_ch *ch;
2395         struct srpt_ioctx *ioctx;
2396
2397         ioctx = scst_cmd_get_tgt_priv(scmnd);
2398         BUG_ON(!ioctx);
2399
2400         if (srpt_get_cmd_state(ioctx) == SRPT_STATE_ABORTED) {
2401                 TRACE_DBG("cmd with tag %lld has been aborted",
2402                           scst_cmd_get_tag(scmnd));
2403                 return SCST_TGT_RES_FATAL_ERROR;
2404         }
2405
2406         ch = ioctx->ch;
2407         WARN_ON(ch != scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd)));
2408         BUG_ON(!ch);
2409
2410         if (ch->state == RDMA_CHANNEL_DISCONNECTING) {
2411                 TRACE_DBG("cmd with tag %lld: channel disconnecting",
2412                           scst_cmd_get_tag(scmnd));
2413                 return SCST_TGT_RES_FATAL_ERROR;
2414         } else if (ch->state == RDMA_CHANNEL_CONNECTING)
2415                 return SCST_TGT_RES_QUEUE_FULL;
2416
2417         srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA);
2418
2419         return srpt_xfer_data(ch, ioctx, scmnd);
2420 }
2421
2422 /*
2423  * Called by the SCST core. Transmits the response buffer and status held in
2424  * 'scmnd'. Must not block.
2425  */
2426 static int srpt_xmit_response(struct scst_cmd *scmnd)
2427 {
2428         struct srpt_rdma_ch *ch;
2429         struct srpt_ioctx *ioctx;
2430         struct srp_rsp *srp_rsp;
2431         u64 tag;
2432         int ret = SCST_TGT_RES_SUCCESS;
2433         int dir;
2434         int status;
2435
2436         ioctx = scst_cmd_get_tgt_priv(scmnd);
2437         BUG_ON(!ioctx);
2438
2439         if (srpt_get_cmd_state(ioctx) == SRPT_STATE_ABORTED) {
2440                 TRACE_DBG("cmd with tag %lld has been aborted",
2441                           scst_cmd_get_tag(scmnd));
2442                 ret = SCST_TGT_RES_FATAL_ERROR;
2443                 goto out;
2444         }
2445
2446         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2447         BUG_ON(!ch);
2448
2449         tag = scst_cmd_get_tag(scmnd);
2450
2451         srpt_set_cmd_state(ioctx, SRPT_STATE_PROCESSED);
2452
2453         if (ch->state != RDMA_CHANNEL_LIVE) {
2454                 PRINT_ERROR("%s: tag= %lld channel in bad state %d",
2455                        __func__, (unsigned long long)tag, ch->state);
2456
2457                 if (ch->state == RDMA_CHANNEL_DISCONNECTING) {
2458                         TRACE_DBG("cmd with tag %lld: channel disconnecting",
2459                                   (unsigned long long)tag);
2460                         ret = SCST_TGT_RES_FATAL_ERROR;
2461                 } else if (ch->state == RDMA_CHANNEL_CONNECTING)
2462                         ret = SCST_TGT_RES_QUEUE_FULL;
2463
2464                 if (unlikely(scst_cmd_aborted(scmnd)))
2465                         goto out_aborted;
2466
2467                 goto out;
2468         }
2469
2470         ib_dma_sync_single_for_cpu(ch->sport->sdev->device, ioctx->dma,
2471                                    MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
2472
2473         srp_rsp = ioctx->buf;
2474
2475         if (unlikely(scst_cmd_aborted(scmnd))) {
2476                 TRACE_MGMT_DBG("%s: tag= %lld already got aborted",
2477                                __func__, (unsigned long long)tag);
2478                 goto out_aborted;
2479         }
2480
2481         dir = scst_cmd_get_data_direction(scmnd);
2482         status = scst_cmd_get_status(scmnd) & 0xff;
2483
2484         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2485
2486         if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2487                 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2488                 if (srp_rsp->sense_data_len >
2489                     (MAX_MESSAGE_SIZE - sizeof *srp_rsp))
2490                         srp_rsp->sense_data_len =
2491                             MAX_MESSAGE_SIZE - sizeof *srp_rsp;
2492
2493                 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2494                        srp_rsp->sense_data_len);
2495
2496                 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2497                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2498
2499                 if (!status)
2500                         status = SAM_STAT_CHECK_CONDITION;
2501         }
2502
2503         srp_rsp->status = status;
2504
2505         /* For read commands, transfer the data to the initiator. */
2506         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2507                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2508                 if (ret != SCST_TGT_RES_SUCCESS) {
2509                         PRINT_ERROR("%s: tag= %lld xfer_data failed",
2510                                     __func__, (unsigned long long)tag);
2511                         goto out;
2512                 }
2513         }
2514
2515         if (srpt_post_send(ch, ioctx,
2516                            sizeof *srp_rsp +
2517                            be32_to_cpu(srp_rsp->sense_data_len))) {
2518                 PRINT_ERROR("%s: ch->state= %d tag= %lld",
2519                             __func__, ch->state,
2520                             (unsigned long long)tag);
2521                 ret = SCST_TGT_RES_FATAL_ERROR;
2522         }
2523
2524 out:
2525         return ret;
2526
2527 out_aborted:
2528         ret = SCST_TGT_RES_SUCCESS;
2529         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
2530         srpt_set_cmd_state(ioctx, SRPT_STATE_ABORTED);
2531         WARN_ON(scmnd->state != SCST_CMD_STATE_XMIT_WAIT);
2532         scst_tgt_cmd_done(scmnd, SCST_CONTEXT_SAME);
2533         goto out;
2534 }
2535
2536 /*
2537  * Called by the SCST core to inform ib_srpt that a received task management
2538  * function has been completed. Must not block.
2539  */
2540 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2541 {
2542         struct srpt_rdma_ch *ch;
2543         struct srpt_mgmt_ioctx *mgmt_ioctx;
2544         struct srpt_ioctx *ioctx;
2545         int rsp_len;
2546
2547         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2548         BUG_ON(!mgmt_ioctx);
2549
2550         ch = mgmt_ioctx->ch;
2551         BUG_ON(!ch);
2552
2553         ioctx = mgmt_ioctx->ioctx;
2554         BUG_ON(!ioctx);
2555
2556         TRACE_DBG("%s: tsk_mgmt_done for tag= %lld status=%d",
2557                   __func__, (unsigned long long)mgmt_ioctx->tag,
2558                   scst_mgmt_cmd_get_status(mcmnd));
2559
2560         srpt_set_cmd_state(ioctx, SRPT_STATE_PROCESSED);
2561
2562         rsp_len = srpt_build_tskmgmt_rsp(ch, ioctx,
2563                                          (scst_mgmt_cmd_get_status(mcmnd) ==
2564                                           SCST_MGMT_STATUS_SUCCESS) ?
2565                                          SRP_TSK_MGMT_SUCCESS :
2566                                          SRP_TSK_MGMT_FAILED,
2567                                          mgmt_ioctx->tag);
2568         srpt_post_send(ch, ioctx, rsp_len);
2569
2570         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2571
2572         kfree(mgmt_ioctx);
2573 }
2574
2575 /*
2576  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2577  * to be freed. May be called in IRQ context.
2578  */
2579 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2580 {
2581         struct srpt_rdma_ch *ch;
2582         struct srpt_ioctx *ioctx;
2583
2584         ioctx = scst_cmd_get_tgt_priv(scmnd);
2585         BUG_ON(!ioctx);
2586
2587         ch = ioctx->ch;
2588         if (ch) {
2589                 spin_lock_irq(&ch->spinlock);
2590                 list_del(&ioctx->scmnd_list);
2591                 ch->active_scmnd_cnt--;
2592                 spin_unlock_irq(&ch->spinlock);
2593                 ioctx->ch = NULL;
2594         }
2595
2596         srpt_reset_ioctx(ch, ioctx);
2597         scst_cmd_set_tgt_priv(scmnd, NULL);
2598 }
2599
2600 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2601 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2602 static void srpt_refresh_port_work(void *ctx)
2603 #else
2604 static void srpt_refresh_port_work(struct work_struct *work)
2605 #endif
2606 {
2607 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2608         struct srpt_port *sport = (struct srpt_port *)ctx;
2609 #else
2610         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2611 #endif
2612
2613         srpt_refresh_port(sport);
2614 }
2615
2616 /*
2617  * Called by the SCST core to detect target adapters. Returns the number of
2618  * detected target adapters.
2619  */
2620 static int srpt_detect(struct scst_tgt_template *tp)
2621 {
2622         int device_count;
2623
2624         TRACE_ENTRY();
2625
2626         device_count = atomic_read(&srpt_device_count);
2627
2628         TRACE_EXIT_RES(device_count);
2629
2630         return device_count;
2631 }
2632
2633 /*
2634  * Callback function called by the SCST core from scst_unregister() to free up
2635  * the resources associated with device scst_tgt.
2636  */
2637 static int srpt_release(struct scst_tgt *scst_tgt)
2638 {
2639         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2640         struct srpt_rdma_ch *ch, *tmp_ch;
2641
2642         TRACE_ENTRY();
2643
2644         BUG_ON(!scst_tgt);
2645 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2646         WARN_ON(!sdev);
2647         if (!sdev)
2648                 return -ENODEV;
2649 #else
2650         if (WARN_ON(!sdev))
2651                 return -ENODEV;
2652 #endif
2653
2654 #ifdef CONFIG_SCST_PROC
2655         srpt_unregister_procfs_entry(scst_tgt->tgtt);
2656 #endif /*CONFIG_SCST_PROC*/
2657
2658         spin_lock_irq(&sdev->spinlock);
2659         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
2660                 list_del(&ch->list);
2661                 spin_unlock_irq(&sdev->spinlock);
2662                 srpt_release_channel(ch, 1);
2663                 spin_lock_irq(&sdev->spinlock);
2664         }
2665         spin_unlock_irq(&sdev->spinlock);
2666
2667         srpt_unregister_mad_agent(sdev);
2668
2669         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2670
2671         TRACE_EXIT();
2672
2673         return 0;
2674 }
2675
2676 /*
2677  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2678  * when the module parameter 'thread' is not zero (the default is zero).
2679  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2680  *
2681  * @pre thread != 0
2682  */
2683 static int srpt_ioctx_thread(void *arg)
2684 {
2685         struct srpt_ioctx *ioctx;
2686
2687         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2688         current->flags |= PF_NOFREEZE;
2689
2690         spin_lock_irq(&srpt_thread.thread_lock);
2691         while (!kthread_should_stop()) {
2692                 wait_queue_t wait;
2693                 init_waitqueue_entry(&wait, current);
2694
2695                 if (!srpt_test_ioctx_list()) {
2696                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2697
2698                         for (;;) {
2699                                 set_current_state(TASK_INTERRUPTIBLE);
2700                                 if (srpt_test_ioctx_list())
2701                                         break;
2702                                 spin_unlock_irq(&srpt_thread.thread_lock);
2703                                 schedule();
2704                                 spin_lock_irq(&srpt_thread.thread_lock);
2705                         }
2706                         set_current_state(TASK_RUNNING);
2707                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2708                 }
2709
2710                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2711                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2712                                            struct srpt_ioctx, comp_list);
2713
2714                         list_del(&ioctx->comp_list);
2715
2716                         spin_unlock_irq(&srpt_thread.thread_lock);
2717                         switch (ioctx->op) {
2718                         case IB_WC_SEND:
2719                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2720                                         SCST_CONTEXT_DIRECT);
2721                                 break;
2722                         case IB_WC_RDMA_WRITE:
2723                         case IB_WC_RDMA_READ:
2724                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2725                                 break;
2726                         case IB_WC_RECV:
2727                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2728                                 break;
2729                         default:
2730                                 break;
2731                         }
2732 #if defined(CONFIG_SCST_DEBUG)
2733                         if (thread_processing_delay_in_us
2734                             <= MAX_UDELAY_MS * 1000)
2735                                 udelay(thread_processing_delay_in_us);
2736 #endif
2737                         spin_lock_irq(&srpt_thread.thread_lock);
2738                 }
2739         }
2740         spin_unlock_irq(&srpt_thread.thread_lock);
2741
2742         return 0;
2743 }
2744
2745 /* SCST target template for the SRP target implementation. */
2746 static struct scst_tgt_template srpt_template = {
2747         .name = DRV_NAME,
2748         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2749         .xmit_response_atomic = 1,
2750         .rdy_to_xfer_atomic = 1,
2751         .detect = srpt_detect,
2752         .release = srpt_release,
2753         .xmit_response = srpt_xmit_response,
2754         .rdy_to_xfer = srpt_rdy_to_xfer,
2755         .on_free_cmd = srpt_on_free_cmd,
2756         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2757 };
2758
2759 /*
2760  * The callback function srpt_release_class_dev() is called whenever a
2761  * device is removed from the /sys/class/infiniband_srpt device class.
2762  */
2763 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2764 static void srpt_release_class_dev(struct class_device *class_dev)
2765 #else
2766 static void srpt_release_class_dev(struct device *dev)
2767 #endif
2768 {
2769 }
2770
2771 #ifdef CONFIG_SCST_PROC
2772
2773 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2774 static int srpt_trace_level_show(struct seq_file *seq, void *v)
2775 {
2776         return scst_proc_log_entry_read(seq, trace_flag, NULL);
2777 }
2778
2779 static ssize_t srpt_proc_trace_level_write(struct file *file,
2780         const char __user *buf, size_t length, loff_t *off)
2781 {
2782         return scst_proc_log_entry_write(file, buf, length, &trace_flag,
2783                 DEFAULT_SRPT_TRACE_FLAGS, NULL);
2784 }
2785
2786 static struct scst_proc_data srpt_log_proc_data = {
2787         SCST_DEF_RW_SEQ_OP(srpt_proc_trace_level_write)
2788         .show = srpt_trace_level_show,
2789 };
2790 #endif
2791
2792 #endif /* CONFIG_SCST_PROC */
2793
2794 static struct class_attribute srpt_class_attrs[] = {
2795         __ATTR_NULL,
2796 };
2797
2798 static struct class srpt_class = {
2799         .name = "infiniband_srpt",
2800 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2801         .release = srpt_release_class_dev,
2802 #else
2803         .dev_release = srpt_release_class_dev,
2804 #endif
2805         .class_attrs = srpt_class_attrs,
2806 };
2807
2808 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2809 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2810 #else
2811 static ssize_t show_login_info(struct device *dev,
2812                                struct device_attribute *attr, char *buf)
2813 #endif
2814 {
2815         struct srpt_device *sdev =
2816 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2817                 container_of(class_dev, struct srpt_device, class_dev);
2818 #else
2819                 container_of(dev, struct srpt_device, dev);
2820 #endif
2821         struct srpt_port *sport;
2822         int i;
2823         int len = 0;
2824
2825         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2826                 sport = &sdev->port[i];
2827
2828                 len += sprintf(buf + len,
2829                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2830                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2831                                "service_id=%016llx\n",
2832                                (unsigned long long) srpt_service_guid,
2833                                (unsigned long long) srpt_service_guid,
2834                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2835                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2836                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2837                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2838                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2839                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2840                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2841                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2842                                (unsigned long long) srpt_service_guid);
2843         }
2844
2845         return len;
2846 }
2847
2848 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2849 static CLASS_DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2850 #else
2851 static DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2852 #endif
2853
2854 /*
2855  * Callback function called by the InfiniBand core when either an InfiniBand
2856  * device has been added or during the ib_register_client() call for each
2857  * registered InfiniBand device.
2858  */
2859 static void srpt_add_one(struct ib_device *device)
2860 {
2861         struct srpt_device *sdev;
2862         struct srpt_port *sport;
2863         struct ib_srq_init_attr srq_attr;
2864         int i;
2865
2866         TRACE_ENTRY();
2867
2868         TRACE_DBG("device = %p, device->dma_ops = %p", device, device->dma_ops);
2869
2870         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2871         if (!sdev)
2872                 return;
2873
2874         sdev->device = device;
2875
2876 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2877         sdev->class_dev.class = &srpt_class;
2878         sdev->class_dev.dev = device->dma_device;
2879         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2880                  "srpt-%s", device->name);
2881 #else
2882         sdev->dev.class = &srpt_class;
2883         sdev->dev.parent = device->dma_device;
2884 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2885         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2886 #else
2887         dev_set_name(&sdev->dev, "srpt-%s", device->name);
2888 #endif
2889 #endif
2890
2891 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2892         if (class_device_register(&sdev->class_dev))
2893                 goto free_dev;
2894         if (class_device_create_file(&sdev->class_dev,
2895                                      &class_device_attr_login_info))
2896                 goto err_dev;
2897 #else
2898         if (device_register(&sdev->dev))
2899                 goto free_dev;
2900         if (device_create_file(&sdev->dev, &dev_attr_login_info))
2901                 goto err_dev;
2902 #endif
2903
2904         if (ib_query_device(device, &sdev->dev_attr))
2905                 goto err_dev;
2906
2907         sdev->pd = ib_alloc_pd(device);
2908         if (IS_ERR(sdev->pd))
2909                 goto err_dev;
2910
2911         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2912         if (IS_ERR(sdev->mr))
2913                 goto err_pd;
2914
2915         srq_attr.event_handler = srpt_srq_event;
2916         srq_attr.srq_context = (void *)sdev;
2917         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2918         srq_attr.attr.max_sge = 1;
2919         srq_attr.attr.srq_limit = 0;
2920
2921         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2922         if (IS_ERR(sdev->srq))
2923                 goto err_mr;
2924
2925         TRACE_DBG("%s: create SRQ #wr= %d max_allow=%d dev= %s",
2926                __func__, srq_attr.attr.max_wr,
2927               sdev->dev_attr.max_srq_wr, device->name);
2928
2929         if (!srpt_service_guid)
2930                 srpt_service_guid = be64_to_cpu(device->node_guid);
2931
2932         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2933         if (IS_ERR(sdev->cm_id))
2934                 goto err_srq;
2935
2936         /* print out target login information */
2937         TRACE_DBG("Target login info: id_ext=%016llx,"
2938                   "ioc_guid=%016llx,pkey=ffff,service_id=%016llx",
2939                   (unsigned long long) srpt_service_guid,
2940                   (unsigned long long) srpt_service_guid,
2941                   (unsigned long long) srpt_service_guid);
2942
2943         /*
2944          * We do not have a consistent service_id (ie. also id_ext of target_id)
2945          * to identify this target. We currently use the guid of the first HCA
2946          * in the system as service_id; therefore, the target_id will change
2947          * if this HCA is gone bad and replaced by different HCA
2948          */
2949         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0, NULL))
2950                 goto err_cm;
2951
2952         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2953                               srpt_event_handler);
2954         if (ib_register_event_handler(&sdev->event_handler))
2955                 goto err_cm;
2956
2957         if (srpt_alloc_ioctx_ring(sdev))
2958                 goto err_event;
2959
2960         INIT_LIST_HEAD(&sdev->rch_list);
2961         spin_lock_init(&sdev->spinlock);
2962
2963         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2964                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2965
2966         ib_set_client_data(device, &srpt_client, sdev);
2967
2968         sdev->scst_tgt = scst_register(&srpt_template, NULL);
2969         if (!sdev->scst_tgt) {
2970                 PRINT_ERROR("SCST registration failed for %s.",
2971                             sdev->device->name);
2972                 goto err_ring;
2973         }
2974
2975         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
2976
2977         WARN_ON(sdev->device->phys_port_cnt
2978                 > sizeof(sdev->port)/sizeof(sdev->port[0]));
2979
2980         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
2981                 sport = &sdev->port[i - 1];
2982                 sport->sdev = sdev;
2983                 sport->port = i;
2984 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2985                 /*
2986                  * A vanilla 2.6.19 or older kernel without backported OFED
2987                  * kernel headers.
2988                  */
2989                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
2990 #else
2991                 INIT_WORK(&sport->work, srpt_refresh_port_work);
2992 #endif
2993                 if (srpt_refresh_port(sport)) {
2994                         PRINT_ERROR("MAD registration failed for %s-%d.",
2995                                     sdev->device->name, i);
2996                         goto err_refresh_port;
2997                 }
2998         }
2999
3000         atomic_inc(&srpt_device_count);
3001
3002         TRACE_EXIT();
3003
3004         return;
3005
3006 err_refresh_port:
3007         scst_unregister(sdev->scst_tgt);
3008 err_ring:
3009         ib_set_client_data(device, &srpt_client, NULL);
3010         srpt_free_ioctx_ring(sdev);
3011 err_event:
3012         ib_unregister_event_handler(&sdev->event_handler);
3013 err_cm:
3014         ib_destroy_cm_id(sdev->cm_id);
3015 err_srq:
3016         ib_destroy_srq(sdev->srq);
3017 err_mr:
3018         ib_dereg_mr(sdev->mr);
3019 err_pd:
3020         ib_dealloc_pd(sdev->pd);
3021 err_dev:
3022 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3023         class_device_unregister(&sdev->class_dev);
3024 #else
3025         device_unregister(&sdev->dev);
3026 #endif
3027 free_dev:
3028         kfree(sdev);
3029
3030         TRACE_EXIT();
3031 }
3032
3033 /*
3034  * Callback function called by the InfiniBand core when either an InfiniBand
3035  * device has been removed or during the ib_unregister_client() call for each
3036  * registered InfiniBand device.
3037  */
3038 static void srpt_remove_one(struct ib_device *device)
3039 {
3040         int i;
3041         struct srpt_device *sdev;
3042
3043         TRACE_ENTRY();
3044
3045         sdev = ib_get_client_data(device, &srpt_client);
3046 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
3047         WARN_ON(!sdev);
3048         if (!sdev)
3049                 return;
3050 #else
3051         if (WARN_ON(!sdev))
3052                 return;
3053 #endif
3054
3055         /*
3056          * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
3057          * finished if it is running.
3058          */
3059         for (i = 0; i < sdev->device->phys_port_cnt; i++)
3060 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
3061                 cancel_work_sync(&sdev->port[i].work);
3062 #else
3063                 /*
3064                  * cancel_work_sync() was introduced in kernel 2.6.22. Older
3065                  * kernels do not have a facility to cancel scheduled work.
3066                  */
3067                 PRINT_ERROR("%s",
3068                        "your kernel does not provide cancel_work_sync().");
3069 #endif
3070
3071         scst_unregister(sdev->scst_tgt);
3072         sdev->scst_tgt = NULL;
3073
3074         ib_unregister_event_handler(&sdev->event_handler);
3075         ib_destroy_cm_id(sdev->cm_id);
3076         ib_destroy_srq(sdev->srq);
3077         ib_dereg_mr(sdev->mr);
3078         ib_dealloc_pd(sdev->pd);
3079 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3080         class_device_unregister(&sdev->class_dev);
3081 #else
3082         device_unregister(&sdev->dev);
3083 #endif
3084
3085         srpt_free_ioctx_ring(sdev);
3086         kfree(sdev);
3087
3088         TRACE_EXIT();
3089 }
3090
3091 #ifdef CONFIG_SCST_PROC
3092
3093 /**
3094  * Create procfs entries for srpt. Currently the only procfs entry created
3095  * by this function is the "trace_level" entry.
3096  */
3097 static int srpt_register_procfs_entry(struct scst_tgt_template *tgt)
3098 {
3099         int res = 0;
3100 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
3101         struct proc_dir_entry *p, *root;
3102
3103         root = scst_proc_get_tgt_root(tgt);
3104         WARN_ON(!root);
3105         if (root) {
3106                 /*
3107                  * Fill in the scst_proc_data::data pointer, which is used in
3108                  * a printk(KERN_INFO ...) statement in
3109                  * scst_proc_log_entry_write() in scst_proc.c.
3110                  */
3111                 srpt_log_proc_data.data = (char *)tgt->name;
3112                 p = scst_create_proc_entry(root, SRPT_PROC_TRACE_LEVEL_NAME,
3113                                            &srpt_log_proc_data);
3114                 if (!p)
3115                         res = -ENOMEM;
3116         } else
3117                 res = -ENOMEM;
3118
3119 #endif
3120         return res;
3121 }
3122
3123 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt)
3124 {
3125 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
3126         struct proc_dir_entry *root;
3127
3128         root = scst_proc_get_tgt_root(tgt);
3129         WARN_ON(!root);
3130         if (root)
3131                 remove_proc_entry(SRPT_PROC_TRACE_LEVEL_NAME, root);
3132 #endif
3133 }
3134
3135 #endif /*CONFIG_SCST_PROC*/
3136
3137 /*
3138  * Module initialization.
3139  *
3140  * Note: since ib_register_client() registers callback functions, and since at
3141  * least one of these callback functions (srpt_add_one()) calls SCST functions,
3142  * the SCST target template must be registered before ib_register_client() is
3143  * called.
3144  */
3145 static int __init srpt_init_module(void)
3146 {
3147         int ret;
3148
3149         ret = class_register(&srpt_class);
3150         if (ret) {
3151                 PRINT_ERROR("%s", "couldn't register class ib_srpt");
3152                 goto out;
3153         }
3154
3155         ret = scst_register_target_template(&srpt_template);
3156         if (ret < 0) {
3157                 PRINT_ERROR("%s", "couldn't register with scst");
3158                 ret = -ENODEV;
3159                 goto out_unregister_class;
3160         }
3161
3162 #ifdef CONFIG_SCST_PROC
3163         ret = srpt_register_procfs_entry(&srpt_template);
3164         if (ret) {
3165                 PRINT_ERROR("%s", "couldn't register procfs entry");
3166                 goto out_unregister_target;
3167         }
3168 #endif /*CONFIG_SCST_PROC*/
3169
3170         ret = ib_register_client(&srpt_client);
3171         if (ret) {
3172                 PRINT_ERROR("%s", "couldn't register IB client");
3173                 goto out_unregister_target;
3174         }
3175
3176         if (thread) {
3177                 spin_lock_init(&srpt_thread.thread_lock);
3178                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
3179                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
3180                                                  NULL, "srpt_thread");
3181                 if (IS_ERR(srpt_thread.thread)) {
3182                         srpt_thread.thread = NULL;
3183                         thread = 0;
3184                 }
3185         }
3186
3187         return 0;
3188
3189 out_unregister_target:
3190 #ifdef CONFIG_SCST_PROC
3191         /*
3192          * Note: the procfs entry is unregistered in srpt_release(), which is
3193          * called by scst_unregister_target_template().
3194          */
3195 #endif /*CONFIG_SCST_PROC*/
3196         scst_unregister_target_template(&srpt_template);
3197 out_unregister_class:
3198         class_unregister(&srpt_class);
3199 out:
3200         return ret;
3201 }
3202
3203 static void __exit srpt_cleanup_module(void)
3204 {
3205         TRACE_ENTRY();
3206
3207         if (srpt_thread.thread)
3208                 kthread_stop(srpt_thread.thread);
3209         ib_unregister_client(&srpt_client);
3210         scst_unregister_target_template(&srpt_template);
3211         class_unregister(&srpt_class);
3212
3213         TRACE_EXIT();
3214 }
3215
3216 module_init(srpt_init_module);
3217 module_exit(srpt_cleanup_module);