Changes:
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43 #include <linux/delay.h>
44 #include <asm/atomic.h>
45 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
46 #include <linux/proc_fs.h>
47 #include <linux/seq_file.h>
48 #endif
49 #include "ib_srpt.h"
50 #define LOG_PREFIX "ib_srpt" /* Prefix for SCST tracing macros. */
51 #include "scst_debug.h"
52
53 #define CONFIG_SCST_PROC
54
55 /* Name of this kernel module. */
56 #define DRV_NAME                "ib_srpt"
57 #define DRV_VERSION             "1.0.1"
58 #define DRV_RELDATE             "July 10, 2008"
59 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
60 /* Flags to be used in SCST debug tracing statements. */
61 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
62                                   | TRACE_MGMT | TRACE_SPECIAL)
63 /* Name of the entry that will be created under /proc/scsi_tgt/ib_srpt. */
64 #define SRPT_PROC_TRACE_LEVEL_NAME      "trace_level"
65 #endif
66
67 #define MELLANOX_SRPT_ID_STRING "SCST SRP target"
68
69 MODULE_AUTHOR("Vu Pham");
70 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
71                    "v" DRV_VERSION " (" DRV_RELDATE ")");
72 MODULE_LICENSE("Dual BSD/GPL");
73
74 struct srpt_thread {
75         /* Protects thread_ioctx_list. */
76         spinlock_t thread_lock;
77         /* I/O contexts to be processed by the kernel thread. */
78         struct list_head thread_ioctx_list;
79         /* SRPT kernel thread. */
80         struct task_struct *thread;
81 };
82
83 /*
84  * Global Variables
85  */
86
87 static u64 srpt_service_guid;
88 /* List of srpt_device structures. */
89 static atomic_t srpt_device_count;
90 static int use_port_guid_in_session_name;
91 static int thread = 1;
92 static struct srpt_thread srpt_thread;
93 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
94 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
95 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
96 module_param(trace_flag, long, 0644);
97 MODULE_PARM_DESC(trace_flag,
98                  "Trace flags for the ib_srpt kernel module.");
99 #endif
100 #if defined(CONFIG_SCST_DEBUG)
101 static unsigned long interrupt_processing_delay_in_us;
102 module_param(interrupt_processing_delay_in_us, long, 0744);
103 MODULE_PARM_DESC(interrupt_processing_delay_in_us,
104                  "CQ completion handler interrupt delay in microseconds.");
105 static unsigned long thread_processing_delay_in_us;
106 module_param(thread_processing_delay_in_us, long, 0744);
107 MODULE_PARM_DESC(thread_processing_delay_in_us,
108                  "SRP thread processing delay in microseconds.");
109 #endif
110
111 module_param(thread, int, 0444);
112 MODULE_PARM_DESC(thread,
113                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
114                  "where possible.");
115
116 static unsigned int srp_max_rdma_size = DEFAULT_MAX_RDMA_SIZE;
117 module_param(srp_max_rdma_size, int, 0744);
118 MODULE_PARM_DESC(thread,
119                  "Maximum size of SRP RDMA transfers for new connections.");
120
121 static unsigned int srp_max_message_size = DEFAULT_MAX_MESSAGE_SIZE;
122 module_param(srp_max_message_size, int, 0444);
123 MODULE_PARM_DESC(thread,
124                  "Maximum size of SRP control messages in bytes.");
125
126 module_param(use_port_guid_in_session_name, bool, 0444);
127 MODULE_PARM_DESC(use_port_guid_in_session_name,
128                  "Use target port ID in the SCST session name such that"
129                  " redundant paths between multiport systems can be masked.");
130
131 static void srpt_add_one(struct ib_device *device);
132 static void srpt_remove_one(struct ib_device *device);
133 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
134 #ifdef CONFIG_SCST_PROC
135 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt);
136 #endif /*CONFIG_SCST_PROC*/
137 static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
138                                     struct srpt_ioctx *ioctx);
139 static void srpt_release_channel(struct scst_session *scst_sess);
140
141 static struct ib_client srpt_client = {
142         .name = DRV_NAME,
143         .add = srpt_add_one,
144         .remove = srpt_remove_one
145 };
146
147 /**
148  * Atomically test and set the channel state.
149  * @ch: RDMA channel.
150  * @old: channel state to compare with.
151  * @new: state to change the channel state to if the current state matches the
152  *       argument 'old'.
153  *
154  * Returns the previous channel state.
155  */
156 static enum rdma_ch_state
157 srpt_test_and_set_channel_state(struct srpt_rdma_ch *ch,
158                                 enum rdma_ch_state old,
159                                 enum rdma_ch_state new)
160 {
161         return atomic_cmpxchg(&ch->state, old, new);
162 }
163
164 /*
165  * Callback function called by the InfiniBand core when an asynchronous IB
166  * event occurs. This callback may occur in interrupt context. See also
167  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
168  * Architecture Specification.
169  */
170 static void srpt_event_handler(struct ib_event_handler *handler,
171                                struct ib_event *event)
172 {
173         struct srpt_device *sdev;
174         struct srpt_port *sport;
175
176         TRACE_ENTRY();
177
178         sdev = ib_get_client_data(event->device, &srpt_client);
179         if (!sdev || sdev->device != event->device)
180                 return;
181
182         TRACE_DBG("ASYNC event= %d on device= %s",
183                   event->event, sdev->device->name);
184
185         switch (event->event) {
186         case IB_EVENT_PORT_ERR:
187                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
188                         sport = &sdev->port[event->element.port_num - 1];
189                         sport->lid = 0;
190                         sport->sm_lid = 0;
191                 }
192                 break;
193         case IB_EVENT_PORT_ACTIVE:
194         case IB_EVENT_LID_CHANGE:
195         case IB_EVENT_PKEY_CHANGE:
196         case IB_EVENT_SM_CHANGE:
197         case IB_EVENT_CLIENT_REREGISTER:
198                 /*
199                  * Refresh port data asynchronously. Note: it is safe to call
200                  * schedule_work() even if &sport->work is already on the
201                  * global workqueue because schedule_work() tests for the
202                  * work_pending() condition before adding &sport->work to the
203                  * global work queue.
204                  */
205                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
206                         sport = &sdev->port[event->element.port_num - 1];
207                         if (!sport->lid && !sport->sm_lid)
208                                 schedule_work(&sport->work);
209                 }
210                 break;
211         default:
212                 PRINT_ERROR("received unrecognized IB event %d", event->event);
213                 break;
214         }
215
216         TRACE_EXIT();
217 }
218
219 /*
220  * Callback function called by the InfiniBand core for SRQ (shared receive
221  * queue) events.
222  */
223 static void srpt_srq_event(struct ib_event *event, void *ctx)
224 {
225         TRACE_ENTRY();
226
227         TRACE_DBG("SRQ event %d", event->event);
228
229         TRACE_EXIT();
230 }
231
232 /*
233  * Callback function called by the InfiniBand core for QP (queue pair) events.
234  */
235 static void srpt_qp_event(struct ib_event *event, struct srpt_rdma_ch *ch)
236 {
237         TRACE_DBG("QP event %d on cm_id=%p sess_name=%s state=%d",
238                   event->event, ch->cm_id, ch->sess_name,
239                   atomic_read(&ch->state));
240
241         switch (event->event) {
242         case IB_EVENT_COMM_EST:
243 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
244                 ib_cm_notify(ch->cm_id, event->event);
245 #else
246                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
247                 PRINT_ERROR("%s", "how to perform ib_cm_notify() on a"
248                             " vanilla 2.6.18 kernel ???");
249 #endif
250                 break;
251         case IB_EVENT_QP_LAST_WQE_REACHED:
252                 if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_LIVE,
253                         RDMA_CHANNEL_DISCONNECTING) == RDMA_CHANNEL_LIVE) {
254                         PRINT_INFO("disconnected session %s.", ch->sess_name);
255                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
256                 }
257                 break;
258         default:
259                 PRINT_ERROR("received unrecognized IB QP event %d",
260                             event->event);
261                 break;
262         }
263 }
264
265 /*
266  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
267  * the lowest four bits of value in element slot of the array of four bit
268  * elements called c_list (controller list). The index slot is one-based.
269  *
270  * @pre 1 <= slot && 0 <= value && value < 16
271  */
272 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
273 {
274         u16 id;
275         u8 tmp;
276
277         id = (slot - 1) / 2;
278         if (slot & 0x1) {
279                 tmp = c_list[id] & 0xf;
280                 c_list[id] = (value << 4) | tmp;
281         } else {
282                 tmp = c_list[id] & 0xf0;
283                 c_list[id] = (value & 0xf) | tmp;
284         }
285 }
286
287 /*
288  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
289  * ClassPortInfo in the InfiniBand Architecture Specification.
290  */
291 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
292 {
293         struct ib_class_port_info *cif;
294
295         cif = (struct ib_class_port_info *)mad->data;
296         memset(cif, 0, sizeof *cif);
297         cif->base_version = 1;
298         cif->class_version = 1;
299         cif->resp_time_value = 20;
300
301         mad->mad_hdr.status = 0;
302 }
303
304 /*
305  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
306  * InfiniBand Architecture Specification. See also section B.7,
307  * table B.6 in the T10 SRP r16a document.
308  */
309 static void srpt_get_iou(struct ib_dm_mad *mad)
310 {
311         struct ib_dm_iou_info *ioui;
312         u8 slot;
313         int i;
314
315         ioui = (struct ib_dm_iou_info *)mad->data;
316         ioui->change_id = 1;
317         ioui->max_controllers = 16;
318
319         /* set present for slot 1 and empty for the rest */
320         srpt_set_ioc(ioui->controller_list, 1, 1);
321         for (i = 1, slot = 2; i < 16; i++, slot++)
322                 srpt_set_ioc(ioui->controller_list, slot, 0);
323
324         mad->mad_hdr.status = 0;
325 }
326
327 /*
328  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
329  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
330  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
331  * document.
332  */
333 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
334                          struct ib_dm_mad *mad)
335 {
336         struct ib_dm_ioc_profile *iocp;
337
338         iocp = (struct ib_dm_ioc_profile *)mad->data;
339
340         if (!slot || slot > 16) {
341                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
342                 return;
343         }
344
345         if (slot > 2) {
346                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
347                 return;
348         }
349
350         memset(iocp, 0, sizeof *iocp);
351         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
352         iocp->guid = cpu_to_be64(srpt_service_guid);
353         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
354         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
355         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
356         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
357         iocp->subsys_device_id = 0x0;
358         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
359         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
360         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
361         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
362         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
363         iocp->rdma_read_depth = 4;
364         iocp->send_size = cpu_to_be32(srp_max_message_size);
365         iocp->rdma_size = cpu_to_be32(min(max(srp_max_rdma_size, 256U),
366                                           1U << 24));
367         iocp->num_svc_entries = 1;
368         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
369                 SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
370
371         mad->mad_hdr.status = 0;
372 }
373
374 /*
375  * Device management: write ServiceEntries to mad for the given slot. See also
376  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
377  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
378  */
379 static void srpt_get_svc_entries(u64 ioc_guid,
380                                  u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
381 {
382         struct ib_dm_svc_entries *svc_entries;
383
384         WARN_ON(!ioc_guid);
385
386         if (!slot || slot > 16) {
387                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
388                 return;
389         }
390
391         if (slot > 2 || lo > hi || hi > 1) {
392                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
393                 return;
394         }
395
396         svc_entries = (struct ib_dm_svc_entries *)mad->data;
397         memset(svc_entries, 0, sizeof *svc_entries);
398         svc_entries->service_entries[0].id = cpu_to_be64(ioc_guid);
399         snprintf(svc_entries->service_entries[0].name,
400                  sizeof(svc_entries->service_entries[0].name),
401                  "%s%016llx",
402                  SRP_SERVICE_NAME_PREFIX,
403                  (unsigned long long)ioc_guid);
404
405         mad->mad_hdr.status = 0;
406 }
407
408 /*
409  * Actual processing of a received MAD *rq_mad received through source port *sp
410  * (MAD = InfiniBand management datagram). The response to be sent back is
411  * written to *rsp_mad.
412  */
413 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
414                                  struct ib_dm_mad *rsp_mad)
415 {
416         u16 attr_id;
417         u32 slot;
418         u8 hi, lo;
419
420         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
421         switch (attr_id) {
422         case DM_ATTR_CLASS_PORT_INFO:
423                 srpt_get_class_port_info(rsp_mad);
424                 break;
425         case DM_ATTR_IOU_INFO:
426                 srpt_get_iou(rsp_mad);
427                 break;
428         case DM_ATTR_IOC_PROFILE:
429                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
430                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
431                 break;
432         case DM_ATTR_SVC_ENTRIES:
433                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
434                 hi = (u8) ((slot >> 8) & 0xff);
435                 lo = (u8) (slot & 0xff);
436                 slot = (u16) ((slot >> 16) & 0xffff);
437                 srpt_get_svc_entries(srpt_service_guid,
438                                      slot, hi, lo, rsp_mad);
439                 break;
440         default:
441                 rsp_mad->mad_hdr.status =
442                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
443                 break;
444         }
445 }
446
447 /*
448  * Callback function that is called by the InfiniBand core after transmission of
449  * a MAD. (MAD = management datagram; AH = address handle.)
450  */
451 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
452                                   struct ib_mad_send_wc *mad_wc)
453 {
454         ib_destroy_ah(mad_wc->send_buf->ah);
455         ib_free_send_mad(mad_wc->send_buf);
456 }
457
458 /*
459  * Callback function that is called by the InfiniBand core after reception of
460  * a MAD (management datagram).
461  */
462 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
463                                   struct ib_mad_recv_wc *mad_wc)
464 {
465         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
466         struct ib_ah *ah;
467         struct ib_mad_send_buf *rsp;
468         struct ib_dm_mad *dm_mad;
469
470         if (!mad_wc || !mad_wc->recv_buf.mad)
471                 return;
472
473         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
474                                   mad_wc->recv_buf.grh, mad_agent->port_num);
475         if (IS_ERR(ah))
476                 goto err;
477
478         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
479
480         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
481                                  mad_wc->wc->pkey_index, 0,
482                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
483                                  GFP_KERNEL);
484         if (IS_ERR(rsp))
485                 goto err_rsp;
486
487         rsp->ah = ah;
488
489         dm_mad = rsp->mad;
490         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
491         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
492         dm_mad->mad_hdr.status = 0;
493
494         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
495         case IB_MGMT_METHOD_GET:
496                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
497                 break;
498         case IB_MGMT_METHOD_SET:
499                 dm_mad->mad_hdr.status =
500                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
501                 break;
502         default:
503                 dm_mad->mad_hdr.status =
504                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
505                 break;
506         }
507
508         if (!ib_post_send_mad(rsp, NULL)) {
509                 ib_free_recv_mad(mad_wc);
510                 /* will destroy_ah & free_send_mad in send completion */
511                 return;
512         }
513
514         ib_free_send_mad(rsp);
515
516 err_rsp:
517         ib_destroy_ah(ah);
518 err:
519         ib_free_recv_mad(mad_wc);
520 }
521
522 /*
523  * Enable InfiniBand management datagram processing, update the cached sm_lid,
524  * lid and gid values, and register a callback function for processing MADs
525  * on the specified port. It is safe to call this function more than once for
526  * the same port.
527  */
528 static int srpt_refresh_port(struct srpt_port *sport)
529 {
530         struct ib_mad_reg_req reg_req;
531         struct ib_port_modify port_modify;
532         struct ib_port_attr port_attr;
533         int ret;
534
535         TRACE_ENTRY();
536
537         memset(&port_modify, 0, sizeof port_modify);
538         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
539         port_modify.clr_port_cap_mask = 0;
540
541         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
542         if (ret)
543                 goto err_mod_port;
544
545         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
546         if (ret)
547                 goto err_query_port;
548
549         sport->sm_lid = port_attr.sm_lid;
550         sport->lid = port_attr.lid;
551
552         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
553         if (ret)
554                 goto err_query_port;
555
556         if (!sport->mad_agent) {
557                 memset(&reg_req, 0, sizeof reg_req);
558                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
559                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
560                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
561                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
562
563                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
564                                                          sport->port,
565                                                          IB_QPT_GSI,
566                                                          &reg_req, 0,
567                                                          srpt_mad_send_handler,
568                                                          srpt_mad_recv_handler,
569                                                          sport);
570                 if (IS_ERR(sport->mad_agent)) {
571                         ret = PTR_ERR(sport->mad_agent);
572                         sport->mad_agent = NULL;
573                         goto err_query_port;
574                 }
575         }
576
577         TRACE_EXIT_RES(0);
578
579         return 0;
580
581 err_query_port:
582
583         port_modify.set_port_cap_mask = 0;
584         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
585         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
586
587 err_mod_port:
588
589         TRACE_EXIT_RES(ret);
590
591         return ret;
592 }
593
594 /*
595  * Unregister the callback function for processing MADs and disable MAD
596  * processing for all ports of the specified device. It is safe to call this
597  * function more than once for the same device.
598  */
599 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
600 {
601         struct ib_port_modify port_modify = {
602                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
603         };
604         struct srpt_port *sport;
605         int i;
606
607         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
608                 sport = &sdev->port[i - 1];
609                 WARN_ON(sport->port != i);
610                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
611                         PRINT_ERROR("%s", "disabling MAD processing failed.");
612                 if (sport->mad_agent) {
613                         ib_unregister_mad_agent(sport->mad_agent);
614                         sport->mad_agent = NULL;
615                 }
616         }
617 }
618
619 /**
620  * Allocate and initialize an SRPT I/O context structure.
621  */
622 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
623 {
624         struct srpt_ioctx *ioctx;
625
626         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
627         if (!ioctx)
628                 goto out;
629
630         ioctx->buf = kzalloc(srp_max_message_size, GFP_KERNEL);
631         if (!ioctx->buf)
632                 goto out_free_ioctx;
633
634         ioctx->dma = ib_dma_map_single(sdev->device, ioctx->buf,
635                                        srp_max_message_size, DMA_BIDIRECTIONAL);
636         if (ib_dma_mapping_error(sdev->device, ioctx->dma))
637                 goto out_free_buf;
638
639         return ioctx;
640
641 out_free_buf:
642         kfree(ioctx->buf);
643 out_free_ioctx:
644         kfree(ioctx);
645 out:
646         return NULL;
647 }
648
649 /*
650  * Deallocate an SRPT I/O context structure.
651  */
652 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
653 {
654         if (!ioctx)
655                 return;
656
657         ib_dma_unmap_single(sdev->device, ioctx->dma,
658                             srp_max_message_size, DMA_BIDIRECTIONAL);
659         kfree(ioctx->buf);
660         kfree(ioctx);
661 }
662
663 /*
664  * Associate a ring of SRPT I/O context structures with the specified device.
665  */
666 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
667 {
668         int i;
669
670         TRACE_ENTRY();
671
672         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
673                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
674
675                 if (!sdev->ioctx_ring[i])
676                         goto err;
677
678                 sdev->ioctx_ring[i]->index = i;
679         }
680
681         TRACE_EXIT_RES(0);
682
683         return 0;
684
685 err:
686         while (--i > 0) {
687                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
688                 sdev->ioctx_ring[i] = NULL;
689         }
690         TRACE_EXIT_RES(-ENOMEM);
691         return -ENOMEM;
692 }
693
694 /* Free the ring of SRPT I/O context structures. */
695 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
696 {
697         int i;
698
699         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
700                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
701                 sdev->ioctx_ring[i] = NULL;
702         }
703 }
704
705 /**
706  * Set the state of a command.
707  * @new: New state to be set.
708  *
709  * Does not modify the state of aborted commands. Returns the previous command
710  * state.
711  */
712 static enum srpt_command_state srpt_set_cmd_state(struct srpt_ioctx *ioctx,
713                                                   enum srpt_command_state new)
714 {
715         enum srpt_command_state previous;
716
717         WARN_ON(!ioctx);
718         WARN_ON(new == SRPT_STATE_NEW);
719
720         do {
721                 previous = atomic_read(&ioctx->state);
722         } while (previous != SRPT_STATE_ABORTED
723                && atomic_cmpxchg(&ioctx->state, previous, new) != previous);
724
725         return previous;
726 }
727
728 /**
729  * Test and set the state of a command.
730  * @old: State to compare against.
731  * @new: New state to be set if the current state matches 'old'.
732  *
733  * Returns the previous command state.
734  */
735 static enum srpt_command_state
736 srpt_test_and_set_cmd_state(struct srpt_ioctx *ioctx,
737                             enum srpt_command_state old,
738                             enum srpt_command_state new)
739 {
740         WARN_ON(!ioctx);
741         WARN_ON(old == SRPT_STATE_ABORTED);
742         WARN_ON(new == SRPT_STATE_NEW);
743
744         return atomic_cmpxchg(&ioctx->state, old, new);
745 }
746
747 /**
748  * Post a receive request on the work queue of InfiniBand device 'sdev'.
749  */
750 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
751 {
752         struct ib_sge list;
753         struct ib_recv_wr wr, *bad_wr;
754
755         wr.wr_id = ioctx->index | SRPT_OP_RECV;
756
757         list.addr = ioctx->dma;
758         list.length = srp_max_message_size;
759         list.lkey = sdev->mr->lkey;
760
761         wr.next = NULL;
762         wr.sg_list = &list;
763         wr.num_sge = 1;
764
765         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
766 }
767
768 /**
769  * Post an IB send request.
770  * @ch: RDMA channel to post the send request on.
771  * @ioctx: I/O context of the send request.
772  * @len: length of the request to be sent in bytes.
773  *
774  * Returns zero upon success and a non-zero value upon failure.
775  */
776 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
777                           int len)
778 {
779         struct ib_sge list;
780         struct ib_send_wr wr, *bad_wr;
781         struct srpt_device *sdev = ch->sport->sdev;
782         int ret;
783
784         ret = -ENOMEM;
785         if (atomic_dec_return(&ch->qp_wr_avail) < 0) {
786                 PRINT_ERROR("%s[%d]: SRQ full", __func__, __LINE__);
787                 goto out;
788         }
789
790         ib_dma_sync_single_for_device(sdev->device, ioctx->dma,
791                                       len, DMA_TO_DEVICE);
792
793         list.addr = ioctx->dma;
794         list.length = len;
795         list.lkey = sdev->mr->lkey;
796
797         wr.next = NULL;
798         wr.wr_id = ioctx->index;
799         wr.sg_list = &list;
800         wr.num_sge = 1;
801         wr.opcode = IB_WR_SEND;
802         wr.send_flags = IB_SEND_SIGNALED;
803
804         ret = ib_post_send(ch->qp, &wr, &bad_wr);
805
806 out:
807         if (ret < 0)
808                 atomic_inc(&ch->qp_wr_avail);
809         return ret;
810 }
811
812 /**
813  * srpt_get_desc_tbl() - Parse the data descriptors of an SRP_CMD request.
814  * @ioctx: Pointer to the I/O context associated with the request.
815  * @srp_cmd: Pointer to the SRP_CMD request data.
816  * @dir: Pointer to the variable to which the transfer direction will be
817  *   written.
818  * @data_len: Pointer to the variable to which the total data length of all
819  *   descriptors in the SRP_CMD request will be written.
820  *
821  * This function initializes ioctx->nrbuf and ioctx->r_bufs.
822  *
823  * Returns -EINVAL when the SRP_CMD request contains inconsistent descriptors;
824  * -ENOMEM when memory allocation fails and zero upon success.
825  */
826 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
827                              scst_data_direction *dir, u64 *data_len)
828 {
829         struct srp_indirect_buf *idb;
830         struct srp_direct_buf *db;
831         unsigned add_cdb_offset;
832         int ret;
833
834         /*
835          * The pointer computations below will only be compiled correctly
836          * if srp_cmd::add_data is declared as s8*, u8*, s8[] or u8[], so check
837          * whether srp_cmd::add_data has been declared as a byte pointer.
838          */
839 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 31)
840         BUILD_BUG_ON(!__same_type(srp_cmd->add_data[0], (s8)0)
841                      && !__same_type(srp_cmd->add_data[0], (u8)0));
842 #else
843         /* Note: the __same_type() macro has been introduced in kernel 2.6.31.*/
844 #endif
845
846         BUG_ON(!dir);
847         BUG_ON(!data_len);
848
849         ret = 0;
850         *data_len = 0;
851
852         /*
853          * The lower four bits of the buffer format field contain the DATA-IN
854          * buffer descriptor format, and the highest four bits contain the
855          * DATA-OUT buffer descriptor format.
856          */
857         *dir = SCST_DATA_NONE;
858         if (srp_cmd->buf_fmt & 0xf)
859                 /* DATA-IN: transfer data from target to initiator. */
860                 *dir = SCST_DATA_READ;
861         else if (srp_cmd->buf_fmt >> 4)
862                 /* DATA-OUT: transfer data from initiator to target. */
863                 *dir = SCST_DATA_WRITE;
864
865         /*
866          * According to the SRP spec, the lower two bits of the 'ADDITIONAL
867          * CDB LENGTH' field are reserved and the size in bytes of this field
868          * is four times the value specified in bits 3..7. Hence the "& ~3".
869          */
870         add_cdb_offset = srp_cmd->add_cdb_len & ~3;
871         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
872             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
873                 ioctx->n_rbuf = 1;
874                 ioctx->rbufs = &ioctx->single_rbuf;
875
876                 db = (struct srp_direct_buf *)(srp_cmd->add_data
877                                                + add_cdb_offset);
878                 memcpy(ioctx->rbufs, db, sizeof *db);
879                 *data_len = be32_to_cpu(db->len);
880         } else if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_INDIRECT) ||
881                    ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_INDIRECT)) {
882                 idb = (struct srp_indirect_buf *)(srp_cmd->add_data
883                                                   + add_cdb_offset);
884
885                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
886
887                 if (ioctx->n_rbuf >
888                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
889                         PRINT_ERROR("received corrupt SRP_CMD request"
890                                     " (%u out + %u in != %u / %zu)",
891                                     srp_cmd->data_out_desc_cnt,
892                                     srp_cmd->data_in_desc_cnt,
893                                     be32_to_cpu(idb->table_desc.len),
894                                     sizeof(*db));
895                         ioctx->n_rbuf = 0;
896                         ret = -EINVAL;
897                         goto out;
898                 }
899
900                 if (ioctx->n_rbuf == 1)
901                         ioctx->rbufs = &ioctx->single_rbuf;
902                 else {
903                         ioctx->rbufs =
904                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
905                         if (!ioctx->rbufs) {
906                                 ioctx->n_rbuf = 0;
907                                 ret = -ENOMEM;
908                                 goto out;
909                         }
910                 }
911
912                 db = idb->desc_list;
913                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
914                 *data_len = be32_to_cpu(idb->len);
915         }
916 out:
917         return ret;
918 }
919
920 /*
921  * Modify the attributes of queue pair 'qp': allow local write, remote read,
922  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
923  */
924 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
925 {
926         struct ib_qp_attr *attr;
927         int ret;
928
929         attr = kzalloc(sizeof *attr, GFP_KERNEL);
930         if (!attr)
931                 return -ENOMEM;
932
933         attr->qp_state = IB_QPS_INIT;
934         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
935             IB_ACCESS_REMOTE_WRITE;
936         attr->port_num = ch->sport->port;
937         attr->pkey_index = 0;
938
939         ret = ib_modify_qp(qp, attr,
940                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
941                            IB_QP_PKEY_INDEX);
942
943         kfree(attr);
944         return ret;
945 }
946
947 /**
948  * Change the state of a channel to 'ready to receive' (RTR).
949  * @ch: channel of the queue pair.
950  * @qp: queue pair to change the state of.
951  *
952  * Returns zero upon success and a negative value upon failure.
953  *
954  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
955  * If this structure ever becomes larger, it might be necessary to allocate
956  * it dynamically instead of on the stack.
957  */
958 static int srpt_ch_qp_rtr(struct srpt_rdma_ch *ch, struct ib_qp *qp)
959 {
960         struct ib_qp_attr qp_attr;
961         int attr_mask;
962         int ret;
963
964         qp_attr.qp_state = IB_QPS_RTR;
965         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
966         if (ret)
967                 goto out;
968
969         qp_attr.max_dest_rd_atomic = 4;
970
971         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
972
973 out:
974         return ret;
975 }
976
977 /**
978  * Change the state of a channel to 'ready to send' (RTS).
979  * @ch: channel of the queue pair.
980  * @qp: queue pair to change the state of.
981  *
982  * Returns zero upon success and a negative value upon failure.
983  *
984  * Note: currently a struct ib_qp_attr takes 136 bytes on a 64-bit system.
985  * If this structure ever becomes larger, it might be necessary to allocate
986  * it dynamically instead of on the stack.
987  */
988 static int srpt_ch_qp_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp)
989 {
990         struct ib_qp_attr qp_attr;
991         int attr_mask;
992         int ret;
993
994         qp_attr.qp_state = IB_QPS_RTS;
995         ret = ib_cm_init_qp_attr(ch->cm_id, &qp_attr, &attr_mask);
996         if (ret)
997                 goto out;
998
999         qp_attr.max_rd_atomic = 4;
1000
1001         ret = ib_modify_qp(qp, &qp_attr, attr_mask);
1002
1003 out:
1004         return ret;
1005 }
1006
1007 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
1008 {
1009         srpt_unmap_sg_to_ib_sge(ch, ioctx);
1010
1011         if (ioctx->n_rbuf > 1) {
1012                 kfree(ioctx->rbufs);
1013                 ioctx->rbufs = NULL;
1014         }
1015
1016         WARN_ON(!ch);
1017         if (!ch)
1018                 return;
1019
1020         if (srpt_post_recv(ch->sport->sdev, ioctx))
1021                 PRINT_ERROR("%s", "SRQ post_recv failed - this is serious.");
1022                 /* we should queue it back to free_ioctx queue */
1023         else
1024                 atomic_inc(&ch->req_lim_delta);
1025 }
1026
1027 /**
1028  * Abort a command.
1029  */
1030 static void srpt_abort_scst_cmd(struct srpt_device *sdev,
1031                                 struct scst_cmd *scmnd)
1032 {
1033         struct srpt_ioctx *ioctx;
1034         scst_data_direction dir;
1035         enum srpt_command_state previous_state;
1036
1037         TRACE_ENTRY();
1038
1039         ioctx = scst_cmd_get_tgt_priv(scmnd);
1040         BUG_ON(!ioctx);
1041
1042         previous_state = srpt_set_cmd_state(ioctx, SRPT_STATE_ABORTED);
1043         if (previous_state == SRPT_STATE_ABORTED)
1044                 goto out;
1045
1046         TRACE_DBG("Aborting cmd with state %d and tag %lld",
1047                   previous_state, scst_cmd_get_tag(scmnd));
1048
1049         dir = scst_cmd_get_data_direction(scmnd);
1050         if (dir != SCST_DATA_NONE && scst_cmd_get_sg(scmnd))
1051                 ib_dma_unmap_sg(sdev->device,
1052                                 scst_cmd_get_sg(scmnd),
1053                                 scst_cmd_get_sg_cnt(scmnd),
1054                                 scst_to_tgt_dma_dir(dir));
1055
1056         switch (previous_state) {
1057         case SRPT_STATE_NEW:
1058                 break;
1059         case SRPT_STATE_NEED_DATA:
1060                 WARN_ON(scst_cmd_get_data_direction(ioctx->scmnd)
1061                         == SCST_DATA_READ);
1062                 scst_rx_data(scmnd,
1063                              SCST_RX_STATUS_ERROR,
1064                              SCST_CONTEXT_THREAD);
1065                 break;
1066         case SRPT_STATE_DATA_IN:
1067         case SRPT_STATE_PROCESSED:
1068                 scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
1069                 WARN_ON(scmnd->state != SCST_CMD_STATE_XMIT_WAIT);
1070                 scst_tgt_cmd_done(scmnd, scst_estimate_context());
1071                 break;
1072         default:
1073                 TRACE_DBG("Aborting cmd with state %d", previous_state);
1074                 WARN_ON("ERROR: unexpected command state");
1075         }
1076
1077 out:
1078         ;
1079
1080         TRACE_EXIT();
1081 }
1082
1083 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
1084 {
1085         struct srpt_ioctx *ioctx;
1086         struct srpt_device *sdev = ch->sport->sdev;
1087
1088         if (wc->wr_id & SRPT_OP_RECV) {
1089                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
1090                 PRINT_ERROR("%s", "This is serious - SRQ is in bad state.");
1091         } else {
1092                 ioctx = sdev->ioctx_ring[wc->wr_id];
1093
1094                 if (ioctx->scmnd)
1095                         srpt_abort_scst_cmd(sdev, ioctx->scmnd);
1096                 else
1097                         srpt_reset_ioctx(ch, ioctx);
1098         }
1099 }
1100
1101 /** Process an IB send completion notification. */
1102 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
1103                                   struct srpt_ioctx *ioctx,
1104                                   enum scst_exec_context context)
1105 {
1106         if (ioctx->scmnd) {
1107                 scst_data_direction dir =
1108                         scst_cmd_get_data_direction(ioctx->scmnd);
1109
1110                 if (dir != SCST_DATA_NONE && scst_cmd_get_sg(ioctx->scmnd))
1111                         ib_dma_unmap_sg(ch->sport->sdev->device,
1112                                         scst_cmd_get_sg(ioctx->scmnd),
1113                                         scst_cmd_get_sg_cnt(ioctx->scmnd),
1114                                         scst_to_tgt_dma_dir(dir));
1115
1116                 WARN_ON(ioctx->scmnd->state != SCST_CMD_STATE_XMIT_WAIT);
1117                 scst_tgt_cmd_done(ioctx->scmnd, context);
1118         } else
1119                 srpt_reset_ioctx(ch, ioctx);
1120 }
1121
1122 /** Process an IB RDMA completion notification. */
1123 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
1124                                   struct srpt_ioctx *ioctx)
1125 {
1126         if (!ioctx->scmnd) {
1127                 WARN_ON("ERROR: ioctx->scmnd == NULL");
1128                 srpt_reset_ioctx(ch, ioctx);
1129                 return;
1130         }
1131
1132         /*
1133          * If an RDMA completion notification has been received for a write
1134          * command, tell SCST that processing can continue by calling
1135          * scst_rx_data().
1136          */
1137         if (srpt_test_and_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA,
1138                                 SRPT_STATE_DATA_IN) == SRPT_STATE_NEED_DATA) {
1139                 WARN_ON(scst_cmd_get_data_direction(ioctx->scmnd)
1140                         == SCST_DATA_READ);
1141                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
1142                              scst_estimate_context());
1143         }
1144 }
1145
1146 /**
1147  * srpt_build_cmd_rsp() - Build an SRP_RSP response.
1148  * @ch: RDMA channel through which the request has been received.
1149  * @ioctx: I/O context associated with the SRP_CMD request. The response will
1150  *   be built in the buffer ioctx->buf points at and hence this function will
1151  *   overwrite the request data.
1152  * @tag: tag of the request for which this response is being generated.
1153  * @status: value for the STATUS field of the SRP_RSP information unit.
1154  * @sense_data: pointer to sense data to be included in the response.
1155  * @sense_data_len: length in bytes of the sense data.
1156  *
1157  * Returns the size in bytes of the SRP_RSP response.
1158  *
1159  * An SRP_RSP response contains a SCSI status or service response. See also
1160  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1161  * response. See also SPC-2 for more information about sense data.
1162  */
1163 static int srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
1164                               struct srpt_ioctx *ioctx, u64 tag, int status,
1165                               const u8 *sense_data, int sense_data_len)
1166 {
1167         struct srp_rsp *srp_rsp;
1168         int limit_delta;
1169         int max_sense_len;
1170
1171         /*
1172          * The lowest bit of all SAM-3 status codes is zero (see also
1173          * paragraph 5.3 in SAM-3).
1174          */
1175         WARN_ON(status & 1);
1176
1177         srp_rsp = ioctx->buf;
1178         BUG_ON(!srp_rsp);
1179         memset(srp_rsp, 0, sizeof *srp_rsp);
1180
1181         limit_delta = atomic_read(&ch->req_lim_delta);
1182         atomic_sub(limit_delta, &ch->req_lim_delta);
1183
1184         srp_rsp->opcode = SRP_RSP;
1185         /*
1186          * Copy the SCSOLNT or UCSOLNT bit from the request to the SOLNT bit
1187          * of the response.
1188          */
1189         srp_rsp->sol_not
1190                 = (ioctx->sol_not
1191                    & (status == SAM_STAT_GOOD ? SRP_SCSOLNT : SRP_UCSOLNT))
1192                 ? SRP_SOLNT : 0;
1193         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1194         srp_rsp->tag = tag;
1195
1196         if (SCST_SENSE_VALID(sense_data)) {
1197                 BUILD_BUG_ON(MIN_MAX_MESSAGE_SIZE <= sizeof(*srp_rsp));
1198                 max_sense_len = ch->max_ti_iu_len - sizeof(*srp_rsp);
1199                 if (sense_data_len > max_sense_len) {
1200                         PRINT_WARNING("truncated sense data from %d to %d"
1201                                 " bytes", sense_data_len,
1202                                 max_sense_len);
1203                         sense_data_len = max_sense_len;
1204                 }
1205
1206                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
1207                 srp_rsp->status = status;
1208                 srp_rsp->sense_data_len = cpu_to_be32(sense_data_len);
1209                 memcpy(srp_rsp + 1, sense_data, sense_data_len);
1210         } else
1211                 sense_data_len = 0;
1212
1213         return sizeof(*srp_rsp) + sense_data_len;
1214 }
1215
1216 /**
1217  * Build a task management response, which is a specific SRP_RSP response.
1218  * @ch: RDMA channel through which the request has been received.
1219  * @ioctx: I/O context in which the SRP_RSP response will be built.
1220  * @rsp_code: RSP_CODE that will be stored in the response.
1221  * @tag: tag of the request for which this response is being generated.
1222  *
1223  * Returns the size in bytes of the SRP_RSP response.
1224  *
1225  * An SRP_RSP response contains a SCSI status or service response. See also
1226  * section 6.9 in the T10 SRP r16a document for the format of an SRP_RSP
1227  * response.
1228  */
1229 static int srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
1230                                   struct srpt_ioctx *ioctx, u8 rsp_code,
1231                                   u64 tag)
1232 {
1233         struct srp_rsp *srp_rsp;
1234         int limit_delta;
1235         int resp_data_len;
1236         int resp_len;
1237
1238         resp_data_len = (rsp_code == SRP_TSK_MGMT_SUCCESS) ? 0 : 4;
1239         resp_len = sizeof(*srp_rsp) + resp_data_len;
1240
1241         srp_rsp = ioctx->buf;
1242         memset(srp_rsp, 0, sizeof *srp_rsp);
1243
1244         limit_delta = atomic_read(&ch->req_lim_delta);
1245         atomic_sub(limit_delta, &ch->req_lim_delta);
1246
1247         srp_rsp->opcode = SRP_RSP;
1248         /*
1249          * Copy the SCSOLNT or UCSOLNT bit from the request to the SOLNT bit
1250          * of the response.
1251          */
1252         srp_rsp->sol_not
1253                 = (ioctx->sol_not
1254                    & (rsp_code == SRP_TSK_MGMT_SUCCESS
1255                       ? SRP_SCSOLNT : SRP_UCSOLNT))
1256                 ? SRP_SOLNT : 0;
1257         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
1258         srp_rsp->tag = tag;
1259
1260         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
1261                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
1262                 srp_rsp->resp_data_len = cpu_to_be32(resp_data_len);
1263                 srp_rsp->data[3] = rsp_code;
1264         }
1265
1266         return resp_len;
1267 }
1268
1269 /*
1270  * Process SRP_CMD.
1271  */
1272 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
1273 {
1274         struct scst_cmd *scmnd;
1275         struct srp_cmd *srp_cmd;
1276         scst_data_direction dir;
1277         u64 data_len;
1278         int ret;
1279
1280         srp_cmd = ioctx->buf;
1281
1282         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
1283                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
1284                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
1285         if (!scmnd)
1286                 goto err;
1287
1288         ioctx->scmnd = scmnd;
1289
1290         ret = srpt_get_desc_tbl(ioctx, srp_cmd, &dir, &data_len);
1291         if (ret) {
1292                 scst_set_cmd_error(scmnd,
1293                         SCST_LOAD_SENSE(scst_sense_invalid_field_in_cdb));
1294                 goto err;
1295         }
1296
1297         switch (srp_cmd->task_attr) {
1298         case SRP_CMD_HEAD_OF_Q:
1299                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
1300                 break;
1301         case SRP_CMD_ORDERED_Q:
1302                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1303                 break;
1304         case SRP_CMD_SIMPLE_Q:
1305                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
1306                 break;
1307         case SRP_CMD_ACA:
1308                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
1309                 break;
1310         default:
1311                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1312                 break;
1313         }
1314
1315         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1316         scst_cmd_set_tgt_priv(scmnd, ioctx);
1317         scst_cmd_set_expected(scmnd, dir, data_len);
1318         scst_cmd_init_done(scmnd, scst_estimate_context());
1319
1320         return 0;
1321
1322 err:
1323         return -1;
1324 }
1325
1326 /*
1327  * srpt_handle_tsk_mgmt() - Process an SRP_TSK_MGMT information unit.
1328  *
1329  * Returns SRP_TSK_MGMT_SUCCESS upon success.
1330  *
1331  * Each task management function is performed by calling one of the
1332  * scst_rx_mgmt_fn*() functions. These functions will either report failure
1333  * or process the task management function asynchronously. The function
1334  * srpt_tsk_mgmt_done() will be called by the SCST core upon completion of the
1335  * task management function. When srpt_handle_tsk_mgmt() reports failure
1336  * (i.e. returns -1) a response will have been built in ioctx->buf. This
1337  * information unit has to be sent back by the caller.
1338  *
1339  * For more information about SRP_TSK_MGMT information units, see also section
1340  * 6.7 in the T10 SRP r16a document.
1341  */
1342 static u8 srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1343                                struct srpt_ioctx *ioctx)
1344 {
1345         struct srp_tsk_mgmt *srp_tsk;
1346         struct srpt_mgmt_ioctx *mgmt_ioctx;
1347         int ret;
1348         u8 srp_tsk_mgmt_status;
1349
1350         srp_tsk = ioctx->buf;
1351
1352         TRACE_DBG("recv_tsk_mgmt= %d for task_tag= %lld"
1353                   " using tag= %lld cm_id= %p sess= %p",
1354                   srp_tsk->tsk_mgmt_func,
1355                   (unsigned long long) srp_tsk->task_tag,
1356                   (unsigned long long) srp_tsk->tag,
1357                   ch->cm_id, ch->scst_sess);
1358
1359         srp_tsk_mgmt_status = SRP_TSK_MGMT_FAILED;
1360         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1361         if (!mgmt_ioctx)
1362                 goto err;
1363
1364         mgmt_ioctx->ioctx = ioctx;
1365         mgmt_ioctx->ch = ch;
1366         mgmt_ioctx->tag = srp_tsk->tag;
1367
1368         switch (srp_tsk->tsk_mgmt_func) {
1369         case SRP_TSK_ABORT_TASK:
1370                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK");
1371                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1372                                           SCST_ABORT_TASK,
1373                                           srp_tsk->task_tag,
1374                                           thread ?
1375                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1376                                           mgmt_ioctx);
1377                 break;
1378         case SRP_TSK_ABORT_TASK_SET:
1379                 TRACE_DBG("%s", "Processing SRP_TSK_ABORT_TASK_SET");
1380                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1381                                           SCST_ABORT_TASK_SET,
1382                                           (u8 *) &srp_tsk->lun,
1383                                           sizeof srp_tsk->lun,
1384                                           thread ?
1385                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1386                                           mgmt_ioctx);
1387                 break;
1388         case SRP_TSK_CLEAR_TASK_SET:
1389                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_TASK_SET");
1390                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1391                                           SCST_CLEAR_TASK_SET,
1392                                           (u8 *) &srp_tsk->lun,
1393                                           sizeof srp_tsk->lun,
1394                                           thread ?
1395                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1396                                           mgmt_ioctx);
1397                 break;
1398         case SRP_TSK_LUN_RESET:
1399                 TRACE_DBG("%s", "Processing SRP_TSK_LUN_RESET");
1400                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1401                                           SCST_LUN_RESET,
1402                                           (u8 *) &srp_tsk->lun,
1403                                           sizeof srp_tsk->lun,
1404                                           thread ?
1405                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1406                                           mgmt_ioctx);
1407                 break;
1408         case SRP_TSK_CLEAR_ACA:
1409                 TRACE_DBG("%s", "Processing SRP_TSK_CLEAR_ACA");
1410                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1411                                           SCST_CLEAR_ACA,
1412                                           (u8 *) &srp_tsk->lun,
1413                                           sizeof srp_tsk->lun,
1414                                           thread ?
1415                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1416                                           mgmt_ioctx);
1417                 break;
1418         default:
1419                 TRACE_DBG("%s", "Unsupported task management function.");
1420                 srp_tsk_mgmt_status = SRP_TSK_MGMT_FUNC_NOT_SUPP;
1421                 goto err;
1422         }
1423
1424         if (ret) {
1425                 TRACE_DBG("Processing task management function failed"
1426                           " (ret = %d).", ret);
1427                 goto err;
1428         }
1429         return SRP_TSK_MGMT_SUCCESS;
1430
1431 err:
1432         kfree(mgmt_ioctx);
1433         return srp_tsk_mgmt_status;
1434 }
1435
1436 /**
1437  * Process a newly received information unit.
1438  * @ch: RDMA channel through which the information unit has been received.
1439  * @ioctx: SRPT I/O context associated with the information unit.
1440  */
1441 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1442                                struct srpt_ioctx *ioctx)
1443 {
1444         struct srp_cmd *srp_cmd;
1445         enum rdma_ch_state ch_state;
1446         u8 srp_response_status;
1447         u8 srp_tsk_mgmt_status;
1448         int len;
1449
1450         /*
1451          * A quote from SAM-3, paragraph 4.9.6: "Any command that is not
1452          * relayed to a dependent logical unit shall be terminated with a
1453          * CHECK CONDITION status. The sense key shall be set to ILLEGAL
1454          * REQUEST and the additional sense code shall be set to INVALID
1455          * COMMAND OPERATION CODE. If a task management function cannot be
1456          * relayed to a dependent logical unit, a service response of SERVICE
1457          * DELIVERY OR TARGET FAILURE shall be returned."
1458          */
1459
1460         srp_response_status = SAM_STAT_BUSY;
1461         /* To keep the compiler happy. */
1462         srp_tsk_mgmt_status = -1;
1463
1464         ch_state = atomic_read(&ch->state);
1465         if (ch_state == RDMA_CHANNEL_CONNECTING) {
1466                 list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1467                 return;
1468         } else if (ch_state == RDMA_CHANNEL_DISCONNECTING) {
1469                 srpt_reset_ioctx(ch, ioctx);
1470                 return;
1471         }
1472
1473         WARN_ON(ch_state != RDMA_CHANNEL_LIVE);
1474
1475         ib_dma_sync_single_for_cpu(ch->sport->sdev->device,
1476                                    ioctx->dma, srp_max_message_size,
1477                                    DMA_FROM_DEVICE);
1478
1479         srp_cmd = ioctx->buf;
1480
1481         ioctx->n_rbuf = 0;
1482         ioctx->rbufs = NULL;
1483         ioctx->n_rdma = 0;
1484         ioctx->n_rdma_ius = 0;
1485         ioctx->rdma_ius = NULL;
1486         ioctx->scmnd = NULL;
1487         ioctx->ch = ch;
1488         ioctx->sol_not = srp_cmd->sol_not;
1489         atomic_set(&ioctx->state, SRPT_STATE_NEW);
1490
1491         switch (srp_cmd->opcode) {
1492         case SRP_CMD:
1493                 if (srpt_handle_cmd(ch, ioctx) < 0) {
1494                         if (ioctx->scmnd)
1495                                 srp_response_status =
1496                                         scst_cmd_get_status(ioctx->scmnd);
1497                         goto err;
1498                 }
1499                 break;
1500
1501         case SRP_TSK_MGMT:
1502                 srp_tsk_mgmt_status = srpt_handle_tsk_mgmt(ch, ioctx);
1503                 if (srp_tsk_mgmt_status != SRP_TSK_MGMT_SUCCESS)
1504                         goto err;
1505                 break;
1506
1507         case SRP_I_LOGOUT:
1508         case SRP_AER_REQ:
1509         default:
1510                 goto err;
1511         }
1512
1513         return;
1514
1515 err:
1516         ch_state = atomic_read(&ch->state);
1517         if (ch_state != RDMA_CHANNEL_LIVE) {
1518                 /* Give up if another thread modified the channel state. */
1519                 PRINT_ERROR("%s: channel is in state %d", __func__, ch_state);
1520                 srpt_reset_ioctx(ch, ioctx);
1521         } else {
1522                 if (srp_cmd->opcode == SRP_TSK_MGMT) {
1523                         len = srpt_build_tskmgmt_rsp(ch, ioctx,
1524                                      srp_tsk_mgmt_status,
1525                                      ((struct srp_tsk_mgmt *)srp_cmd)->tag);
1526                 } else if (ioctx->scmnd)
1527                         len = srpt_build_cmd_rsp(ch, ioctx, srp_cmd->tag,
1528                                 srp_response_status,
1529                                 scst_cmd_get_sense_buffer(ioctx->scmnd),
1530                                 scst_cmd_get_sense_buffer_len(ioctx->scmnd));
1531                 else {
1532                         len = srpt_build_cmd_rsp(ch, ioctx, srp_cmd->tag,
1533                                                  srp_response_status,
1534                                                  NULL, 0);
1535                 }
1536                 if (srpt_post_send(ch, ioctx, len)) {
1537                         PRINT_ERROR("%s: sending SRP_RSP response failed",
1538                                     __func__);
1539                         srpt_reset_ioctx(ch, ioctx);
1540                 }
1541         }
1542 }
1543
1544 /*
1545  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1546  * should stop.
1547  * @pre thread != 0
1548  */
1549 static inline int srpt_test_ioctx_list(void)
1550 {
1551         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1552                    unlikely(kthread_should_stop()));
1553         return res;
1554 }
1555
1556 /*
1557  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1558  *
1559  * @pre thread != 0
1560  */
1561 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1562 {
1563         unsigned long flags;
1564
1565         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1566         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1567         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1568         wake_up(&ioctx_list_waitQ);
1569 }
1570
1571 /**
1572  * InfiniBand completion queue callback function.
1573  * @cq: completion queue.
1574  * @ctx: completion queue context, which was passed as the fourth argument of
1575  *       the function ib_create_cq().
1576  */
1577 static void srpt_completion(struct ib_cq *cq, void *ctx)
1578 {
1579         struct srpt_rdma_ch *ch = ctx;
1580         struct srpt_device *sdev = ch->sport->sdev;
1581         struct ib_wc wc;
1582         struct srpt_ioctx *ioctx;
1583
1584         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1585         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1586                 if (wc.status) {
1587                         PRINT_ERROR("failed %s status= %d",
1588                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1589                                wc.status);
1590                         srpt_handle_err_comp(ch, &wc);
1591                         break;
1592                 }
1593
1594                 if (wc.wr_id & SRPT_OP_RECV) {
1595                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1596                         if (thread) {
1597                                 ioctx->ch = ch;
1598                                 ioctx->op = IB_WC_RECV;
1599                                 srpt_schedule_thread(ioctx);
1600                         } else
1601                                 srpt_handle_new_iu(ch, ioctx);
1602                         continue;
1603                 } else {
1604                         ioctx = sdev->ioctx_ring[wc.wr_id];
1605                         if (wc.opcode == IB_WC_SEND)
1606                                 atomic_inc(&ch->qp_wr_avail);
1607                         else {
1608                                 WARN_ON(wc.opcode != IB_WC_RDMA_READ);
1609                                 WARN_ON(ioctx->n_rdma <= 0);
1610                                 atomic_add(ioctx->n_rdma,
1611                                            &ch->qp_wr_avail);
1612                         }
1613                 }
1614
1615                 if (thread) {
1616                         ioctx->ch = ch;
1617                         ioctx->op = wc.opcode;
1618                         srpt_schedule_thread(ioctx);
1619                 } else {
1620                         switch (wc.opcode) {
1621                         case IB_WC_SEND:
1622                                 srpt_handle_send_comp(ch, ioctx,
1623                                         scst_estimate_context());
1624                                 break;
1625                         case IB_WC_RDMA_WRITE:
1626                         case IB_WC_RDMA_READ:
1627                                 srpt_handle_rdma_comp(ch, ioctx);
1628                                 break;
1629                         default:
1630                                 PRINT_ERROR("received unrecognized IB WC"
1631                                             " opcode %d", wc.opcode);
1632                                 break;
1633                         }
1634                 }
1635
1636 #if defined(CONFIG_SCST_DEBUG)
1637                 if (interrupt_processing_delay_in_us <= MAX_UDELAY_MS * 1000)
1638                         udelay(interrupt_processing_delay_in_us);
1639 #endif
1640         }
1641 }
1642
1643 /*
1644  * Create a completion queue on the specified device.
1645  */
1646 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1647 {
1648         struct ib_qp_init_attr *qp_init;
1649         struct srpt_device *sdev = ch->sport->sdev;
1650         int cqe;
1651         int ret;
1652
1653         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1654         if (!qp_init)
1655                 return -ENOMEM;
1656
1657         /* Create a completion queue (CQ). */
1658
1659         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1660 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1661         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1662 #else
1663         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1664 #endif
1665         if (IS_ERR(ch->cq)) {
1666                 ret = PTR_ERR(ch->cq);
1667                 PRINT_ERROR("failed to create_cq cqe= %d ret= %d", cqe, ret);
1668                 goto out;
1669         }
1670
1671         /* Request completion notification. */
1672
1673         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1674
1675         /* Create a queue pair (QP). */
1676
1677         qp_init->qp_context = (void *)ch;
1678         qp_init->event_handler
1679                 = (void(*)(struct ib_event *, void*))srpt_qp_event;
1680         qp_init->send_cq = ch->cq;
1681         qp_init->recv_cq = ch->cq;
1682         qp_init->srq = sdev->srq;
1683         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1684         qp_init->qp_type = IB_QPT_RC;
1685         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1686         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1687
1688         ch->qp = ib_create_qp(sdev->pd, qp_init);
1689         if (IS_ERR(ch->qp)) {
1690                 ret = PTR_ERR(ch->qp);
1691                 ib_destroy_cq(ch->cq);
1692                 PRINT_ERROR("failed to create_qp ret= %d", ret);
1693                 goto out;
1694         }
1695
1696         atomic_set(&ch->qp_wr_avail, qp_init->cap.max_send_wr);
1697
1698         TRACE_DBG("%s: max_cqe= %d max_sge= %d cm_id= %p",
1699                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1700                ch->cm_id);
1701
1702         /* Modify the attributes and the state of queue pair ch->qp. */
1703
1704         ret = srpt_init_ch_qp(ch, ch->qp);
1705         if (ret) {
1706                 ib_destroy_qp(ch->qp);
1707                 ib_destroy_cq(ch->cq);
1708                 goto out;
1709         }
1710
1711         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1712 out:
1713         kfree(qp_init);
1714         return ret;
1715 }
1716
1717 /**
1718  * Release the channel corresponding to the specified cm_id.
1719  *
1720  * Note: must be called from inside srpt_cm_handler to avoid a race between
1721  * accessing sdev->spinlock and the call to kfree(sdev) in srpt_remove_one()
1722  * (the caller of srpt_cm_handler holds the cm_id spinlock;
1723  * srpt_remove_one() waits until all SCST sessions for the associated
1724  * IB device have been unregistered and SCST session registration involves
1725  * a call to ib_destroy_cm_id(), which locks the cm_id spinlock and hence
1726  * waits until this function has finished).
1727  */
1728 static void srpt_release_channel_by_cmid(struct ib_cm_id *cm_id)
1729 {
1730         struct srpt_device *sdev;
1731         struct srpt_rdma_ch *ch;
1732
1733         sdev = cm_id->context;
1734         BUG_ON(!sdev);
1735         spin_lock_irq(&sdev->spinlock);
1736         list_for_each_entry(ch, &sdev->rch_list, list) {
1737                 if (ch->cm_id == cm_id) {
1738                         list_del(&ch->list);
1739                         atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING);
1740                         scst_unregister_session(ch->scst_sess, 0,
1741                                                 srpt_release_channel);
1742                         break;
1743                 }
1744         }
1745         spin_unlock_irq(&sdev->spinlock);
1746 }
1747
1748 /**
1749  * Look up the RDMA channel that corresponds to the specified cm_id.
1750  *
1751  * Return NULL if no matching RDMA channel has been found.
1752  */
1753 static struct srpt_rdma_ch *srpt_find_channel(struct srpt_device *sdev,
1754                                               struct ib_cm_id *cm_id)
1755 {
1756         struct srpt_rdma_ch *ch;
1757
1758         BUG_ON(!sdev);
1759         ch = NULL;
1760         spin_lock_irq(&sdev->spinlock);
1761         list_for_each_entry(ch, &sdev->rch_list, list)
1762                 if (ch->cm_id == cm_id)
1763                         break;
1764         spin_unlock_irq(&sdev->spinlock);
1765
1766         return ch;
1767 }
1768
1769 /**
1770  * Release all resources associated with an RDMA channel.
1771  *
1772  * Notes:
1773  * - The caller must have removed the channel from the channel list before
1774  *   calling this function.
1775  * - Must be called as a callback function via scst_unregister_session(). Never
1776  *   call this function directly because doing so would trigger several race
1777  *   conditions.
1778  */
1779 static void srpt_release_channel(struct scst_session *scst_sess)
1780 {
1781         struct srpt_rdma_ch *ch;
1782
1783         TRACE_ENTRY();
1784
1785         ch = scst_sess_get_tgt_priv(scst_sess);
1786         BUG_ON(!ch);
1787         WARN_ON(srpt_find_channel(ch->sport->sdev, ch->cm_id) == ch);
1788
1789         WARN_ON(atomic_read(&ch->state) != RDMA_CHANNEL_DISCONNECTING);
1790
1791         TRACE_DBG("destroying cm_id %p", ch->cm_id);
1792         BUG_ON(!ch->cm_id);
1793         ib_destroy_cm_id(ch->cm_id);
1794
1795         ib_destroy_qp(ch->qp);
1796         ib_destroy_cq(ch->cq);
1797         kfree(ch);
1798
1799         TRACE_EXIT();
1800 }
1801
1802 /**
1803  * Process the event IB_CM_REQ_RECEIVED.
1804  *
1805  * Ownership of the cm_id is transferred to the SCST session if this functions
1806  * returns zero. Otherwise the caller remains the owner of cm_id.
1807  */
1808 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1809                             struct ib_cm_req_event_param *param,
1810                             void *private_data)
1811 {
1812         struct srpt_device *sdev = cm_id->context;
1813         struct srp_login_req *req;
1814         struct srp_login_rsp *rsp;
1815         struct srp_login_rej *rej;
1816         struct ib_cm_rep_param *rep_param;
1817         struct srpt_rdma_ch *ch, *tmp_ch;
1818         u32 it_iu_len;
1819         int ret = 0;
1820
1821 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
1822         WARN_ON(!sdev || !private_data);
1823         if (!sdev || !private_data)
1824                 return -EINVAL;
1825 #else
1826         if (WARN_ON(!sdev || !private_data))
1827                 return -EINVAL;
1828 #endif
1829
1830         req = (struct srp_login_req *)private_data;
1831
1832         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1833
1834         PRINT_INFO("Received SRP_LOGIN_REQ with"
1835             " i_port_id 0x%llx:0x%llx, t_port_id 0x%llx:0x%llx and it_iu_len %d"
1836             " on port %d (guid=0x%llx:0x%llx)",
1837             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1838             (unsigned long long)be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1839             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1840             (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1841             it_iu_len,
1842             param->port,
1843             (unsigned long long)be64_to_cpu(*(u64 *)
1844                                 &sdev->port[param->port - 1].gid.raw[0]),
1845             (unsigned long long)be64_to_cpu(*(u64 *)
1846                                 &sdev->port[param->port - 1].gid.raw[8]));
1847
1848         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1849         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1850         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1851
1852         if (!rsp || !rej || !rep_param) {
1853                 ret = -ENOMEM;
1854                 goto out;
1855         }
1856
1857         if (it_iu_len > srp_max_message_size || it_iu_len < 64) {
1858                 rej->reason =
1859                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1860                 ret = -EINVAL;
1861                 PRINT_ERROR("rejected SRP_LOGIN_REQ because its"
1862                             " length (%d bytes) is out of range (%d .. %d)",
1863                             it_iu_len, 64, srp_max_message_size);
1864                 goto reject;
1865         }
1866
1867         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1868                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1869
1870                 spin_lock_irq(&sdev->spinlock);
1871
1872                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1873                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1874                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1875                             && param->port == ch->sport->port
1876                             && param->listen_id == ch->sport->sdev->cm_id
1877                             && ch->cm_id) {
1878                                 enum rdma_ch_state prev_state;
1879
1880                                 /* found an existing channel */
1881                                 TRACE_DBG("Found existing channel name= %s"
1882                                           " cm_id= %p state= %d",
1883                                           ch->sess_name, ch->cm_id,
1884                                           atomic_read(&ch->state));
1885
1886                                 prev_state = atomic_xchg(&ch->state,
1887                                                 RDMA_CHANNEL_DISCONNECTING);
1888                                 if (prev_state == RDMA_CHANNEL_CONNECTING)
1889                                         list_del(&ch->list);
1890
1891                                 spin_unlock_irq(&sdev->spinlock);
1892
1893                                 rsp->rsp_flags =
1894                                         SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1895
1896                                 if (prev_state == RDMA_CHANNEL_LIVE) {
1897                                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
1898                                         PRINT_INFO("disconnected"
1899                                           " session %s because a new"
1900                                           " SRP_LOGIN_REQ has been received.",
1901                                           ch->sess_name);
1902                                 } else if (prev_state ==
1903                                          RDMA_CHANNEL_CONNECTING) {
1904                                         PRINT_ERROR("%s", "rejected"
1905                                           " SRP_LOGIN_REQ because another login"
1906                                           " request is being processed.");
1907                                         ib_send_cm_rej(ch->cm_id,
1908                                                        IB_CM_REJ_NO_RESOURCES,
1909                                                        NULL, 0, NULL, 0);
1910                                         scst_unregister_session(ch->scst_sess,
1911                                                         0,
1912                                                         srpt_release_channel);
1913                                 }
1914
1915                                 spin_lock_irq(&sdev->spinlock);
1916                         }
1917                 }
1918
1919                 spin_unlock_irq(&sdev->spinlock);
1920
1921         } else
1922                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1923
1924         if (((u64) (*(u64 *) req->target_port_id) !=
1925              cpu_to_be64(srpt_service_guid)) ||
1926             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1927              cpu_to_be64(srpt_service_guid))) {
1928                 rej->reason =
1929                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1930                 ret = -ENOMEM;
1931                 PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because it"
1932                        " has an invalid target port identifier.");
1933                 goto reject;
1934         }
1935
1936         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1937         if (!ch) {
1938                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1939                 PRINT_ERROR("%s",
1940                             "rejected SRP_LOGIN_REQ because out of memory.");
1941                 ret = -ENOMEM;
1942                 goto reject;
1943         }
1944
1945         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1946         memcpy(ch->t_port_id, req->target_port_id, 16);
1947         ch->sport = &sdev->port[param->port - 1];
1948         ch->cm_id = cm_id;
1949         atomic_set(&ch->state, RDMA_CHANNEL_CONNECTING);
1950         INIT_LIST_HEAD(&ch->cmd_wait_list);
1951
1952         ret = srpt_create_ch_ib(ch);
1953         if (ret) {
1954                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1955                 PRINT_ERROR("%s", "rejected SRP_LOGIN_REQ because creating"
1956                             " a new RDMA channel failed.");
1957                 goto free_ch;
1958         }
1959
1960         ret = srpt_ch_qp_rtr(ch, ch->qp);
1961         if (ret) {
1962                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1963                 PRINT_ERROR("rejected SRP_LOGIN_REQ because enabling"
1964                        " RTR failed (error code = %d)", ret);
1965                 goto destroy_ib;
1966         }
1967
1968         if (use_port_guid_in_session_name) {
1969                 /*
1970                  * If the kernel module parameter use_port_guid_in_session_name
1971                  * has been specified, use a combination of the target port
1972                  * GUID and the initiator port ID as the session name. This
1973                  * was the original behavior of the SRP target implementation
1974                  * (i.e. before the SRPT was included in OFED 1.3).
1975                  */
1976                 snprintf(ch->sess_name, sizeof(ch->sess_name),
1977                          "0x%016llx%016llx",
1978                          (unsigned long long)be64_to_cpu(*(u64 *)
1979                                 &sdev->port[param->port - 1].gid.raw[8]),
1980                          (unsigned long long)be64_to_cpu(*(u64 *)
1981                                 (ch->i_port_id + 8)));
1982         } else {
1983                 /*
1984                  * Default behavior: use the initator port identifier as the
1985                  * session name.
1986                  */
1987                 snprintf(ch->sess_name, sizeof(ch->sess_name),
1988                          "0x%016llx%016llx",
1989                          (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1990                          (unsigned long long)be64_to_cpu(*(u64 *)
1991                                  (ch->i_port_id + 8)));
1992         }
1993
1994         TRACE_DBG("registering session %s", ch->sess_name);
1995
1996         BUG_ON(!sdev->scst_tgt);
1997         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1998                                               NULL, NULL);
1999         if (!ch->scst_sess) {
2000                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
2001                 TRACE_DBG("%s", "Failed to create scst sess");
2002                 goto destroy_ib;
2003         }
2004
2005         TRACE_DBG("Establish connection sess=%p name=%s cm_id=%p",
2006                   ch->scst_sess, ch->sess_name, ch->cm_id);
2007
2008         scst_sess_set_tgt_priv(ch->scst_sess, ch);
2009
2010         /* create srp_login_response */
2011         rsp->opcode = SRP_LOGIN_RSP;
2012         rsp->tag = req->tag;
2013         rsp->max_it_iu_len = req->req_it_iu_len;
2014         rsp->max_ti_iu_len = req->req_it_iu_len;
2015         ch->max_ti_iu_len = req->req_it_iu_len;
2016         rsp->buf_fmt =
2017             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
2018         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
2019         atomic_set(&ch->req_lim_delta, 0);
2020
2021         /* create cm reply */
2022         rep_param->qp_num = ch->qp->qp_num;
2023         rep_param->private_data = (void *)rsp;
2024         rep_param->private_data_len = sizeof *rsp;
2025         rep_param->rnr_retry_count = 7;
2026         rep_param->flow_control = 1;
2027         rep_param->failover_accepted = 0;
2028         rep_param->srq = 1;
2029         rep_param->responder_resources = 4;
2030         rep_param->initiator_depth = 4;
2031
2032         ret = ib_send_cm_rep(cm_id, rep_param);
2033         if (ret) {
2034                 PRINT_ERROR("sending SRP_LOGIN_REQ response failed"
2035                             " (error code = %d)", ret);
2036                 goto release_channel;
2037         }
2038
2039         spin_lock_irq(&sdev->spinlock);
2040         list_add_tail(&ch->list, &sdev->rch_list);
2041         spin_unlock_irq(&sdev->spinlock);
2042
2043         goto out;
2044
2045 release_channel:
2046         atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING);
2047         scst_unregister_session(ch->scst_sess, 0, NULL);
2048         ch->scst_sess = NULL;
2049
2050 destroy_ib:
2051         ib_destroy_qp(ch->qp);
2052         ib_destroy_cq(ch->cq);
2053
2054 free_ch:
2055         kfree(ch);
2056
2057 reject:
2058         rej->opcode = SRP_LOGIN_REJ;
2059         rej->tag = req->tag;
2060         rej->buf_fmt =
2061             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
2062
2063         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
2064                              (void *)rej, sizeof *rej);
2065
2066 out:
2067         kfree(rep_param);
2068         kfree(rsp);
2069         kfree(rej);
2070
2071         return ret;
2072 }
2073
2074 static void srpt_cm_rej_recv(struct ib_cm_id *cm_id)
2075 {
2076         PRINT_INFO("Received InfiniBand REJ packet for cm_id %p.", cm_id);
2077         srpt_release_channel_by_cmid(cm_id);
2078 }
2079
2080 /**
2081  * Process an IB_CM_RTU_RECEIVED or IB_CM_USER_ESTABLISHED event.
2082  *
2083  * An IB_CM_RTU_RECEIVED message indicates that the connection is established
2084  * and that the recipient may begin transmitting (RTU = ready to use).
2085  */
2086 static void srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
2087 {
2088         struct srpt_rdma_ch *ch;
2089         int ret;
2090
2091         ch = srpt_find_channel(cm_id->context, cm_id);
2092         WARN_ON(!ch);
2093         if (!ch)
2094                 goto out;
2095
2096         if (srpt_test_and_set_channel_state(ch, RDMA_CHANNEL_CONNECTING,
2097                         RDMA_CHANNEL_LIVE) == RDMA_CHANNEL_CONNECTING) {
2098                 struct srpt_ioctx *ioctx, *ioctx_tmp;
2099
2100                 ret = srpt_ch_qp_rts(ch, ch->qp);
2101
2102                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
2103                                          wait_list) {
2104                         list_del(&ioctx->wait_list);
2105                         srpt_handle_new_iu(ch, ioctx);
2106                 }
2107                 if (ret && srpt_test_and_set_channel_state(ch,
2108                         RDMA_CHANNEL_LIVE,
2109                         RDMA_CHANNEL_DISCONNECTING) == RDMA_CHANNEL_LIVE) {
2110                         TRACE_DBG("cm_id=%p sess_name=%s state=%d",
2111                                   cm_id, ch->sess_name,
2112                                   atomic_read(&ch->state));
2113                         ib_send_cm_dreq(ch->cm_id, NULL, 0);
2114                 }
2115         }
2116
2117 out:
2118         ;
2119 }
2120
2121 static void srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
2122 {
2123         PRINT_INFO("Received InfiniBand TimeWait exit for cm_id %p.", cm_id);
2124         srpt_release_channel_by_cmid(cm_id);
2125 }
2126
2127 static void srpt_cm_rep_error(struct ib_cm_id *cm_id)
2128 {
2129         PRINT_INFO("Received InfiniBand REP error for cm_id %p.", cm_id);
2130         srpt_release_channel_by_cmid(cm_id);
2131 }
2132
2133 static void srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
2134 {
2135         struct srpt_rdma_ch *ch;
2136
2137         ch = srpt_find_channel(cm_id->context, cm_id);
2138         WARN_ON(!ch);
2139         if (!ch)
2140                 goto out;
2141
2142         TRACE_DBG("cm_id= %p ch->state= %d", cm_id, atomic_read(&ch->state));
2143
2144         switch (atomic_read(&ch->state)) {
2145         case RDMA_CHANNEL_LIVE:
2146         case RDMA_CHANNEL_CONNECTING:
2147                 ib_send_cm_drep(ch->cm_id, NULL, 0);
2148                 PRINT_INFO("Received DREQ and sent DREP for session %s.",
2149                            ch->sess_name);
2150                 break;
2151         case RDMA_CHANNEL_DISCONNECTING:
2152         default:
2153                 break;
2154         }
2155
2156 out:
2157         ;
2158 }
2159
2160 static void srpt_cm_drep_recv(struct ib_cm_id *cm_id)
2161 {
2162         PRINT_INFO("Received InfiniBand DREP message for cm_id %p.", cm_id);
2163         srpt_release_channel_by_cmid(cm_id);
2164 }
2165
2166 /**
2167  * IB connection manager callback function.
2168  *
2169  * A non-zero return value will cause the caller destroy the CM ID.
2170  *
2171  * Note: srpt_cm_handler() must only return a non-zero value when transferring
2172  * ownership of the cm_id to a channel by srpt_cm_req_recv() failed. Returning
2173  * a non-zero value in any other case will trigger a race with the
2174  * ib_destroy_cm_id() call in srpt_release_channel().
2175  */
2176 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
2177 {
2178         int ret;
2179
2180         ret = 0;
2181         switch (event->event) {
2182         case IB_CM_REQ_RECEIVED:
2183                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
2184                                        event->private_data);
2185                 break;
2186         case IB_CM_REJ_RECEIVED:
2187                 srpt_cm_rej_recv(cm_id);
2188                 break;
2189         case IB_CM_RTU_RECEIVED:
2190         case IB_CM_USER_ESTABLISHED:
2191                 srpt_cm_rtu_recv(cm_id);
2192                 break;
2193         case IB_CM_DREQ_RECEIVED:
2194                 srpt_cm_dreq_recv(cm_id);
2195                 break;
2196         case IB_CM_DREP_RECEIVED:
2197                 srpt_cm_drep_recv(cm_id);
2198                 break;
2199         case IB_CM_TIMEWAIT_EXIT:
2200                 srpt_cm_timewait_exit(cm_id);
2201                 break;
2202         case IB_CM_REP_ERROR:
2203                 srpt_cm_rep_error(cm_id);
2204                 break;
2205         default:
2206                 PRINT_ERROR("received unrecognized IB CM event %d",
2207                             event->event);
2208                 break;
2209         }
2210
2211         return ret;
2212 }
2213
2214 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
2215                                  struct srpt_ioctx *ioctx,
2216                                  struct scst_cmd *scmnd)
2217 {
2218         struct scatterlist *scat;
2219         scst_data_direction dir;
2220         struct rdma_iu *riu;
2221         struct srp_direct_buf *db;
2222         dma_addr_t dma_addr;
2223         struct ib_sge *sge;
2224         u64 raddr;
2225         u32 rsize;
2226         u32 tsize;
2227         u32 dma_len;
2228         int count, nrdma;
2229         int i, j, k;
2230
2231         scat = scst_cmd_get_sg(scmnd);
2232         dir = scst_cmd_get_data_direction(scmnd);
2233         WARN_ON(scat == NULL);
2234         count = ib_dma_map_sg(ch->sport->sdev->device, scat,
2235                               scst_cmd_get_sg_cnt(scmnd),
2236                               scst_to_tgt_dma_dir(dir));
2237         if (unlikely(!count))
2238                 return -EBUSY;
2239
2240         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
2241                 nrdma = ioctx->n_rdma_ius;
2242         else {
2243                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
2244
2245                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
2246                                           scst_cmd_atomic(scmnd)
2247                                           ? GFP_ATOMIC : GFP_KERNEL);
2248                 if (!ioctx->rdma_ius) {
2249                         WARN_ON(scat == NULL);
2250                         ib_dma_unmap_sg(ch->sport->sdev->device,
2251                                         scat, scst_cmd_get_sg_cnt(scmnd),
2252                                         scst_to_tgt_dma_dir(dir));
2253                         return -ENOMEM;
2254                 }
2255
2256                 ioctx->n_rdma_ius = nrdma;
2257         }
2258
2259         db = ioctx->rbufs;
2260         tsize = (dir == SCST_DATA_READ) ?
2261                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2262         dma_len = sg_dma_len(&scat[0]);
2263         riu = ioctx->rdma_ius;
2264
2265         /*
2266          * For each remote desc - calculate the #ib_sge.
2267          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
2268          *      each remote desc rdma_iu is required a rdma wr;
2269          * else
2270          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
2271          *      another rdma wr
2272          */
2273         for (i = 0, j = 0;
2274              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2275                 rsize = be32_to_cpu(db->len);
2276                 raddr = be64_to_cpu(db->va);
2277                 riu->raddr = raddr;
2278                 riu->rkey = be32_to_cpu(db->key);
2279                 riu->sge_cnt = 0;
2280
2281                 /* calculate how many sge required for this remote_buf */
2282                 while (rsize > 0 && tsize > 0) {
2283
2284                         if (rsize >= dma_len) {
2285                                 tsize -= dma_len;
2286                                 rsize -= dma_len;
2287                                 raddr += dma_len;
2288
2289                                 if (tsize > 0) {
2290                                         ++j;
2291                                         if (j < count)
2292                                                 dma_len = sg_dma_len(&scat[j]);
2293                                 }
2294                         } else {
2295                                 tsize -= rsize;
2296                                 dma_len -= rsize;
2297                                 rsize = 0;
2298                         }
2299
2300                         ++riu->sge_cnt;
2301
2302                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
2303                                 ++ioctx->n_rdma;
2304                                 riu->sge =
2305                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
2306                                             scst_cmd_atomic(scmnd)
2307                                             ? GFP_ATOMIC : GFP_KERNEL);
2308                                 if (!riu->sge)
2309                                         goto free_mem;
2310
2311                                 ++riu;
2312                                 riu->sge_cnt = 0;
2313                                 riu->raddr = raddr;
2314                                 riu->rkey = be32_to_cpu(db->key);
2315                         }
2316                 }
2317
2318                 ++ioctx->n_rdma;
2319                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
2320                                    scst_cmd_atomic(scmnd)
2321                                    ? GFP_ATOMIC : GFP_KERNEL);
2322                 if (!riu->sge)
2323                         goto free_mem;
2324         }
2325
2326         db = ioctx->rbufs;
2327         scat = scst_cmd_get_sg(scmnd);
2328         tsize = (dir == SCST_DATA_READ) ?
2329                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
2330         riu = ioctx->rdma_ius;
2331         dma_len = sg_dma_len(&scat[0]);
2332         dma_addr = sg_dma_address(&scat[0]);
2333
2334         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
2335         for (i = 0, j = 0;
2336              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
2337                 rsize = be32_to_cpu(db->len);
2338                 sge = riu->sge;
2339                 k = 0;
2340
2341                 while (rsize > 0 && tsize > 0) {
2342                         sge->addr = dma_addr;
2343                         sge->lkey = ch->sport->sdev->mr->lkey;
2344
2345                         if (rsize >= dma_len) {
2346                                 sge->length =
2347                                         (tsize < dma_len) ? tsize : dma_len;
2348                                 tsize -= dma_len;
2349                                 rsize -= dma_len;
2350
2351                                 if (tsize > 0) {
2352                                         ++j;
2353                                         if (j < count) {
2354                                                 dma_len = sg_dma_len(&scat[j]);
2355                                                 dma_addr =
2356                                                     sg_dma_address(&scat[j]);
2357                                         }
2358                                 }
2359                         } else {
2360                                 sge->length = (tsize < rsize) ? tsize : rsize;
2361                                 tsize -= rsize;
2362                                 dma_len -= rsize;
2363                                 dma_addr += rsize;
2364                                 rsize = 0;
2365                         }
2366
2367                         ++k;
2368                         if (k == riu->sge_cnt && rsize > 0) {
2369                                 ++riu;
2370                                 sge = riu->sge;
2371                                 k = 0;
2372                         } else if (rsize > 0)
2373                                 ++sge;
2374                 }
2375         }
2376
2377         return 0;
2378
2379 free_mem:
2380         srpt_unmap_sg_to_ib_sge(ch, ioctx);
2381
2382         return -ENOMEM;
2383 }
2384
2385 static void srpt_unmap_sg_to_ib_sge(struct srpt_rdma_ch *ch,
2386                                     struct srpt_ioctx *ioctx)
2387 {
2388         struct scst_cmd *scmnd;
2389         struct scatterlist *scat;
2390         scst_data_direction dir;
2391
2392         BUG_ON(ioctx->n_rdma && !ioctx->rdma_ius);
2393
2394         while (ioctx->n_rdma)
2395                 kfree(ioctx->rdma_ius[--ioctx->n_rdma].sge);
2396
2397         kfree(ioctx->rdma_ius);
2398         ioctx->rdma_ius = NULL;
2399
2400         scmnd = ioctx->scmnd;
2401         if (scmnd) {
2402                 BUG_ON(ioctx != scst_cmd_get_tgt_priv(scmnd));
2403                 scat = scst_cmd_get_sg(scmnd);
2404                 if (scat) {
2405                         dir = scst_cmd_get_data_direction(scmnd);
2406                         ib_dma_unmap_sg(ch->sport->sdev->device,
2407                                         scat, scst_cmd_get_sg_cnt(scmnd),
2408                                         scst_to_tgt_dma_dir(dir));
2409                 }
2410         }
2411 }
2412
2413 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2414                               scst_data_direction dir)
2415 {
2416         struct ib_send_wr wr;
2417         struct ib_send_wr *bad_wr;
2418         struct rdma_iu *riu;
2419         int i;
2420         int ret;
2421         int srq_wr_avail;
2422
2423         if (dir == SCST_DATA_WRITE) {
2424                 ret = -ENOMEM;
2425                 srq_wr_avail = atomic_sub_return(ioctx->n_rdma,
2426                                                  &ch->qp_wr_avail);
2427                 if (srq_wr_avail < 0) {
2428                         atomic_add(ioctx->n_rdma, &ch->qp_wr_avail);
2429                         PRINT_INFO("%s[%d]: SRQ full", __func__, __LINE__);
2430                         goto out;
2431                 }
2432         }
2433
2434         ret = 0;
2435         riu = ioctx->rdma_ius;
2436         memset(&wr, 0, sizeof wr);
2437
2438         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
2439                 wr.opcode = (dir == SCST_DATA_READ) ?
2440                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
2441                 wr.next = NULL;
2442                 wr.wr_id = ioctx->index;
2443                 wr.wr.rdma.remote_addr = riu->raddr;
2444                 wr.wr.rdma.rkey = riu->rkey;
2445                 wr.num_sge = riu->sge_cnt;
2446                 wr.sg_list = riu->sge;
2447
2448                 /* only get completion event for the last rdma wr */
2449                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
2450                         wr.send_flags = IB_SEND_SIGNALED;
2451
2452                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
2453                 if (ret)
2454                         goto out;
2455         }
2456
2457 out:
2458         return ret;
2459 }
2460
2461 /*
2462  * Start data transfer between initiator and target. Must not block.
2463  */
2464 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
2465                           struct scst_cmd *scmnd)
2466 {
2467         int ret;
2468
2469         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
2470         if (ret) {
2471                 PRINT_ERROR("%s[%d] ret=%d", __func__, __LINE__, ret);
2472                 ret = SCST_TGT_RES_QUEUE_FULL;
2473                 goto out;
2474         }
2475
2476         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
2477         if (ret) {
2478                 if (ret == -EAGAIN || ret == -ENOMEM) {
2479                         PRINT_INFO("%s[%d] queue full -- ret=%d",
2480                                    __func__, __LINE__, ret);
2481                         ret = SCST_TGT_RES_QUEUE_FULL;
2482                 } else {
2483                         PRINT_ERROR("%s[%d] fatal error -- ret=%d",
2484                                     __func__, __LINE__, ret);
2485                         ret = SCST_TGT_RES_FATAL_ERROR;
2486                 }
2487                 goto out_unmap;
2488         }
2489
2490         ret = SCST_TGT_RES_SUCCESS;
2491
2492 out:
2493         return ret;
2494 out_unmap:
2495         srpt_unmap_sg_to_ib_sge(ch, ioctx);
2496         goto out;
2497 }
2498
2499 /*
2500  * Called by the SCST core to inform ib_srpt that data reception from the
2501  * initiator should start (SCST_DATA_WRITE). Must not block.
2502  */
2503 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
2504 {
2505         struct srpt_rdma_ch *ch;
2506         struct srpt_ioctx *ioctx;
2507         enum rdma_ch_state ch_state;
2508         int ret;
2509
2510         ioctx = scst_cmd_get_tgt_priv(scmnd);
2511         BUG_ON(!ioctx);
2512
2513         WARN_ON(srpt_set_cmd_state(ioctx, SRPT_STATE_NEED_DATA)
2514                 == SRPT_STATE_ABORTED);
2515
2516         ch = ioctx->ch;
2517         WARN_ON(ch != scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd)));
2518         BUG_ON(!ch);
2519
2520         ch_state = atomic_read(&ch->state);
2521         if (ch_state == RDMA_CHANNEL_DISCONNECTING) {
2522                 TRACE_DBG("cmd with tag %lld: channel disconnecting",
2523                           scst_cmd_get_tag(scmnd));
2524                 ret = SCST_TGT_RES_FATAL_ERROR;
2525                 goto out;
2526         } else if (ch_state == RDMA_CHANNEL_CONNECTING) {
2527                 ret = SCST_TGT_RES_QUEUE_FULL;
2528                 goto out;
2529         }
2530         ret = srpt_xfer_data(ch, ioctx, scmnd);
2531
2532 out:
2533         return ret;
2534 }
2535
2536 /**
2537  * srpt_xmit_response() - SCST callback function that transmits the response
2538  * to a SCSI command.
2539  *
2540  * Must not block.
2541  */
2542 static int srpt_xmit_response(struct scst_cmd *scmnd)
2543 {
2544         struct srpt_rdma_ch *ch;
2545         struct srpt_ioctx *ioctx;
2546         int ret = SCST_TGT_RES_SUCCESS;
2547         int dir;
2548         int resp_len;
2549
2550         ioctx = scst_cmd_get_tgt_priv(scmnd);
2551         BUG_ON(!ioctx);
2552
2553         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2554         BUG_ON(!ch);
2555
2556         if (unlikely(scst_cmd_aborted(scmnd))) {
2557                 TRACE_DBG("cmd with tag %lld has been aborted",
2558                           scst_cmd_get_tag(scmnd));
2559                 srpt_abort_scst_cmd(ch->sport->sdev, scmnd);
2560                 ret = SCST_TGT_RES_SUCCESS;
2561                 goto out;
2562         }
2563
2564         if (srpt_set_cmd_state(ioctx, SRPT_STATE_PROCESSED)
2565             == SRPT_STATE_ABORTED) {
2566                 ret = SCST_TGT_RES_SUCCESS;
2567                 goto out;
2568         }
2569
2570         dir = scst_cmd_get_data_direction(scmnd);
2571
2572         /* For read commands, transfer the data to the initiator. */
2573         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2574                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2575                 if (ret != SCST_TGT_RES_SUCCESS) {
2576                         PRINT_ERROR("%s: tag= %lld xfer_data failed",
2577                                     __func__,
2578                                     (unsigned long long)
2579                                     scst_cmd_get_tag(scmnd));
2580                         goto out;
2581                 }
2582         }
2583
2584         scst_check_convert_sense(scmnd);
2585
2586         resp_len = srpt_build_cmd_rsp(ch, ioctx,
2587                                       scst_cmd_get_tag(scmnd),
2588                                       scst_cmd_get_status(scmnd),
2589                                       scst_cmd_get_sense_buffer(scmnd),
2590                                       scst_cmd_get_sense_buffer_len(scmnd));
2591
2592         if (srpt_post_send(ch, ioctx, resp_len)) {
2593                 PRINT_ERROR("%s[%d]: ch->state= %d tag= %lld",
2594                             __func__, __LINE__, atomic_read(&ch->state),
2595                             (unsigned long long)scst_cmd_get_tag(scmnd));
2596                 ret = SCST_TGT_RES_FATAL_ERROR;
2597         }
2598
2599 out:
2600         return ret;
2601 }
2602
2603 /**
2604  * srpt_tsk_mgmt_done() - SCST callback function that sends back the response
2605  * for a task management request.
2606  *
2607  * Must not block.
2608  */
2609 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2610 {
2611         struct srpt_rdma_ch *ch;
2612         struct srpt_mgmt_ioctx *mgmt_ioctx;
2613         struct srpt_ioctx *ioctx;
2614         int rsp_len;
2615
2616         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2617         BUG_ON(!mgmt_ioctx);
2618
2619         ch = mgmt_ioctx->ch;
2620         BUG_ON(!ch);
2621
2622         ioctx = mgmt_ioctx->ioctx;
2623         BUG_ON(!ioctx);
2624
2625         TRACE_DBG("%s: tsk_mgmt_done for tag= %lld status=%d",
2626                   __func__, (unsigned long long)mgmt_ioctx->tag,
2627                   scst_mgmt_cmd_get_status(mcmnd));
2628
2629         if (srpt_set_cmd_state(ioctx, SRPT_STATE_PROCESSED)
2630             == SRPT_STATE_ABORTED)
2631                 goto out;
2632
2633         rsp_len = srpt_build_tskmgmt_rsp(ch, ioctx,
2634                                          (scst_mgmt_cmd_get_status(mcmnd) ==
2635                                           SCST_MGMT_STATUS_SUCCESS) ?
2636                                          SRP_TSK_MGMT_SUCCESS :
2637                                          SRP_TSK_MGMT_FAILED,
2638                                          mgmt_ioctx->tag);
2639         srpt_post_send(ch, ioctx, rsp_len);
2640
2641         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2642
2643         kfree(mgmt_ioctx);
2644
2645 out:
2646         ;
2647 }
2648
2649 /*
2650  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2651  * to be freed. May be called in IRQ context.
2652  */
2653 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2654 {
2655         struct srpt_rdma_ch *ch;
2656         struct srpt_ioctx *ioctx;
2657
2658         ioctx = scst_cmd_get_tgt_priv(scmnd);
2659         BUG_ON(!ioctx);
2660
2661         ch = ioctx->ch;
2662         BUG_ON(!ch);
2663
2664         scst_cmd_set_tgt_priv(scmnd, NULL);
2665         srpt_set_cmd_state(ioctx, SRPT_STATE_ABORTED);
2666         ioctx->scmnd = NULL;
2667         ioctx->ch = NULL;
2668         srpt_reset_ioctx(ch, ioctx);
2669 }
2670
2671 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2672 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2673 static void srpt_refresh_port_work(void *ctx)
2674 #else
2675 static void srpt_refresh_port_work(struct work_struct *work)
2676 #endif
2677 {
2678 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2679         struct srpt_port *sport = (struct srpt_port *)ctx;
2680 #else
2681         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2682 #endif
2683
2684         srpt_refresh_port(sport);
2685 }
2686
2687 /*
2688  * Called by the SCST core to detect target adapters. Returns the number of
2689  * detected target adapters.
2690  */
2691 static int srpt_detect(struct scst_tgt_template *tp)
2692 {
2693         int device_count;
2694
2695         TRACE_ENTRY();
2696
2697         device_count = atomic_read(&srpt_device_count);
2698
2699         TRACE_EXIT_RES(device_count);
2700
2701         return device_count;
2702 }
2703
2704 /*
2705  * Callback function called by the SCST core from scst_unregister() to free up
2706  * the resources associated with device scst_tgt.
2707  */
2708 static int srpt_release(struct scst_tgt *scst_tgt)
2709 {
2710         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2711         struct srpt_rdma_ch *ch, *tmp_ch;
2712
2713         TRACE_ENTRY();
2714
2715         BUG_ON(!scst_tgt);
2716 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2717         WARN_ON(!sdev);
2718         if (!sdev)
2719                 return -ENODEV;
2720 #else
2721         if (WARN_ON(!sdev))
2722                 return -ENODEV;
2723 #endif
2724
2725 #ifdef CONFIG_SCST_PROC
2726         srpt_unregister_procfs_entry(scst_tgt->tgtt);
2727 #endif /*CONFIG_SCST_PROC*/
2728
2729         spin_lock_irq(&sdev->spinlock);
2730         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
2731                 list_del(&ch->list);
2732                 atomic_set(&ch->state, RDMA_CHANNEL_DISCONNECTING);
2733                 spin_unlock_irq(&sdev->spinlock);
2734                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
2735                 scst_unregister_session(ch->scst_sess, true,
2736                                         srpt_release_channel);
2737                 spin_lock_irq(&sdev->spinlock);
2738         }
2739         spin_unlock_irq(&sdev->spinlock);
2740
2741         srpt_unregister_mad_agent(sdev);
2742
2743         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2744
2745         TRACE_EXIT();
2746
2747         return 0;
2748 }
2749
2750 /*
2751  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2752  * when the module parameter 'thread' is not zero (the default is zero).
2753  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2754  *
2755  * @pre thread != 0
2756  */
2757 static int srpt_ioctx_thread(void *arg)
2758 {
2759         struct srpt_ioctx *ioctx;
2760
2761         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2762         current->flags |= PF_NOFREEZE;
2763
2764         spin_lock_irq(&srpt_thread.thread_lock);
2765         while (!kthread_should_stop()) {
2766                 wait_queue_t wait;
2767                 init_waitqueue_entry(&wait, current);
2768
2769                 if (!srpt_test_ioctx_list()) {
2770                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2771
2772                         for (;;) {
2773                                 set_current_state(TASK_INTERRUPTIBLE);
2774                                 if (srpt_test_ioctx_list())
2775                                         break;
2776                                 spin_unlock_irq(&srpt_thread.thread_lock);
2777                                 schedule();
2778                                 spin_lock_irq(&srpt_thread.thread_lock);
2779                         }
2780                         set_current_state(TASK_RUNNING);
2781                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2782                 }
2783
2784                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2785                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2786                                            struct srpt_ioctx, comp_list);
2787
2788                         list_del(&ioctx->comp_list);
2789
2790                         spin_unlock_irq(&srpt_thread.thread_lock);
2791                         switch (ioctx->op) {
2792                         case IB_WC_SEND:
2793                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2794                                         SCST_CONTEXT_DIRECT);
2795                                 break;
2796                         case IB_WC_RDMA_WRITE:
2797                         case IB_WC_RDMA_READ:
2798                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2799                                 break;
2800                         case IB_WC_RECV:
2801                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2802                                 break;
2803                         default:
2804                                 PRINT_ERROR("received unrecognized WC opcode"
2805                                             " %d", ioctx->op);
2806                                 break;
2807                         }
2808 #if defined(CONFIG_SCST_DEBUG)
2809                         if (thread_processing_delay_in_us
2810                             <= MAX_UDELAY_MS * 1000)
2811                                 udelay(thread_processing_delay_in_us);
2812 #endif
2813                         spin_lock_irq(&srpt_thread.thread_lock);
2814                 }
2815         }
2816         spin_unlock_irq(&srpt_thread.thread_lock);
2817
2818         return 0;
2819 }
2820
2821 /* SCST target template for the SRP target implementation. */
2822 static struct scst_tgt_template srpt_template = {
2823         .name = DRV_NAME,
2824         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2825         .xmit_response_atomic = 1,
2826         .rdy_to_xfer_atomic = 1,
2827         .detect = srpt_detect,
2828         .release = srpt_release,
2829         .xmit_response = srpt_xmit_response,
2830         .rdy_to_xfer = srpt_rdy_to_xfer,
2831         .on_free_cmd = srpt_on_free_cmd,
2832         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2833 };
2834
2835 /*
2836  * The callback function srpt_release_class_dev() is called whenever a
2837  * device is removed from the /sys/class/infiniband_srpt device class.
2838  * Although this function has been left empty, a release function has been
2839  * defined such that upon module removal no complaint is logged about a
2840  * missing release function.
2841  */
2842 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2843 static void srpt_release_class_dev(struct class_device *class_dev)
2844 #else
2845 static void srpt_release_class_dev(struct device *dev)
2846 #endif
2847 {
2848 }
2849
2850 #ifdef CONFIG_SCST_PROC
2851
2852 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2853 static int srpt_trace_level_show(struct seq_file *seq, void *v)
2854 {
2855         return scst_proc_log_entry_read(seq, trace_flag, NULL);
2856 }
2857
2858 static ssize_t srpt_proc_trace_level_write(struct file *file,
2859         const char __user *buf, size_t length, loff_t *off)
2860 {
2861         return scst_proc_log_entry_write(file, buf, length, &trace_flag,
2862                 DEFAULT_SRPT_TRACE_FLAGS, NULL);
2863 }
2864
2865 static struct scst_proc_data srpt_log_proc_data = {
2866         SCST_DEF_RW_SEQ_OP(srpt_proc_trace_level_write)
2867         .show = srpt_trace_level_show,
2868 };
2869 #endif
2870
2871 #endif /* CONFIG_SCST_PROC */
2872
2873 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2874 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2875 #else
2876 static ssize_t show_login_info(struct device *dev,
2877                                struct device_attribute *attr, char *buf)
2878 #endif
2879 {
2880         struct srpt_device *sdev =
2881 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2882                 container_of(class_dev, struct srpt_device, class_dev);
2883 #else
2884                 container_of(dev, struct srpt_device, dev);
2885 #endif
2886         struct srpt_port *sport;
2887         int i;
2888         int len = 0;
2889
2890         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2891                 sport = &sdev->port[i];
2892
2893                 len += sprintf(buf + len,
2894                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2895                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2896                                "service_id=%016llx\n",
2897                                (unsigned long long) srpt_service_guid,
2898                                (unsigned long long) srpt_service_guid,
2899                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2900                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2901                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2902                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2903                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2904                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2905                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2906                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2907                                (unsigned long long) srpt_service_guid);
2908         }
2909
2910         return len;
2911 }
2912
2913 static struct class_attribute srpt_class_attrs[] = {
2914         __ATTR_NULL,
2915 };
2916
2917 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2918 static struct class_device_attribute srpt_dev_attrs[] = {
2919 #else
2920 static struct device_attribute srpt_dev_attrs[] = {
2921 #endif
2922         __ATTR(login_info, S_IRUGO, show_login_info, NULL),
2923         __ATTR_NULL,
2924 };
2925
2926 static struct class srpt_class = {
2927         .name        = "infiniband_srpt",
2928 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2929         .release = srpt_release_class_dev,
2930 #else
2931         .dev_release = srpt_release_class_dev,
2932 #endif
2933         .class_attrs = srpt_class_attrs,
2934 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2935         .class_dev_attrs = srpt_dev_attrs,
2936 #else
2937         .dev_attrs   = srpt_dev_attrs,
2938 #endif
2939 };
2940
2941 /*
2942  * Callback function called by the InfiniBand core when either an InfiniBand
2943  * device has been added or during the ib_register_client() call for each
2944  * registered InfiniBand device.
2945  */
2946 static void srpt_add_one(struct ib_device *device)
2947 {
2948         struct srpt_device *sdev;
2949         struct srpt_port *sport;
2950         struct ib_srq_init_attr srq_attr;
2951         int i;
2952
2953         TRACE_ENTRY();
2954
2955         TRACE_DBG("device = %p, device->dma_ops = %p", device, device->dma_ops);
2956
2957         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2958         if (!sdev)
2959                 return;
2960
2961         sdev->device = device;
2962
2963 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2964         sdev->class_dev.class = &srpt_class;
2965         sdev->class_dev.dev = device->dma_device;
2966         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2967                  "srpt-%s", device->name);
2968 #else
2969         sdev->dev.class = &srpt_class;
2970         sdev->dev.parent = device->dma_device;
2971 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2972         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2973 #else
2974         dev_set_name(&sdev->dev, "srpt-%s", device->name);
2975 #endif
2976 #endif
2977
2978 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2979         if (class_device_register(&sdev->class_dev))
2980                 goto free_dev;
2981 #else
2982         if (device_register(&sdev->dev))
2983                 goto free_dev;
2984 #endif
2985
2986         if (ib_query_device(device, &sdev->dev_attr))
2987                 goto err_dev;
2988
2989         sdev->pd = ib_alloc_pd(device);
2990         if (IS_ERR(sdev->pd))
2991                 goto err_dev;
2992
2993         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2994         if (IS_ERR(sdev->mr))
2995                 goto err_pd;
2996
2997         srq_attr.event_handler = srpt_srq_event;
2998         srq_attr.srq_context = (void *)sdev;
2999         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
3000         srq_attr.attr.max_sge = 1;
3001         srq_attr.attr.srq_limit = 0;
3002
3003         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
3004         if (IS_ERR(sdev->srq))
3005                 goto err_mr;
3006
3007         TRACE_DBG("%s: create SRQ #wr= %d max_allow=%d dev= %s",
3008                __func__, srq_attr.attr.max_wr,
3009               sdev->dev_attr.max_srq_wr, device->name);
3010
3011         if (!srpt_service_guid)
3012                 srpt_service_guid = be64_to_cpu(device->node_guid);
3013
3014         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
3015         if (IS_ERR(sdev->cm_id))
3016                 goto err_srq;
3017
3018         /* print out target login information */
3019         TRACE_DBG("Target login info: id_ext=%016llx,"
3020                   "ioc_guid=%016llx,pkey=ffff,service_id=%016llx",
3021                   (unsigned long long) srpt_service_guid,
3022                   (unsigned long long) srpt_service_guid,
3023                   (unsigned long long) srpt_service_guid);
3024
3025         /*
3026          * We do not have a consistent service_id (ie. also id_ext of target_id)
3027          * to identify this target. We currently use the guid of the first HCA
3028          * in the system as service_id; therefore, the target_id will change
3029          * if this HCA is gone bad and replaced by different HCA
3030          */
3031         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0, NULL))
3032                 goto err_cm;
3033
3034         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
3035                               srpt_event_handler);
3036         if (ib_register_event_handler(&sdev->event_handler))
3037                 goto err_cm;
3038
3039         if (srpt_alloc_ioctx_ring(sdev))
3040                 goto err_event;
3041
3042         INIT_LIST_HEAD(&sdev->rch_list);
3043         spin_lock_init(&sdev->spinlock);
3044
3045         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
3046                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
3047
3048         ib_set_client_data(device, &srpt_client, sdev);
3049
3050         sdev->scst_tgt = scst_register(&srpt_template, NULL);
3051         if (!sdev->scst_tgt) {
3052                 PRINT_ERROR("SCST registration failed for %s.",
3053                             sdev->device->name);
3054                 goto err_ring;
3055         }
3056
3057         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
3058
3059         WARN_ON(sdev->device->phys_port_cnt
3060                 > sizeof(sdev->port)/sizeof(sdev->port[0]));
3061
3062         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
3063                 sport = &sdev->port[i - 1];
3064                 sport->sdev = sdev;
3065                 sport->port = i;
3066 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
3067                 /*
3068                  * A vanilla 2.6.19 or older kernel without backported OFED
3069                  * kernel headers.
3070                  */
3071                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
3072 #else
3073                 INIT_WORK(&sport->work, srpt_refresh_port_work);
3074 #endif
3075                 if (srpt_refresh_port(sport)) {
3076                         PRINT_ERROR("MAD registration failed for %s-%d.",
3077                                     sdev->device->name, i);
3078                         goto err_refresh_port;
3079                 }
3080         }
3081
3082         atomic_inc(&srpt_device_count);
3083
3084         TRACE_EXIT();
3085
3086         return;
3087
3088 err_refresh_port:
3089         scst_unregister(sdev->scst_tgt);
3090 err_ring:
3091         ib_set_client_data(device, &srpt_client, NULL);
3092         srpt_free_ioctx_ring(sdev);
3093 err_event:
3094         ib_unregister_event_handler(&sdev->event_handler);
3095 err_cm:
3096         ib_destroy_cm_id(sdev->cm_id);
3097 err_srq:
3098         ib_destroy_srq(sdev->srq);
3099 err_mr:
3100         ib_dereg_mr(sdev->mr);
3101 err_pd:
3102         ib_dealloc_pd(sdev->pd);
3103 err_dev:
3104 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3105         class_device_unregister(&sdev->class_dev);
3106 #else
3107         device_unregister(&sdev->dev);
3108 #endif
3109 free_dev:
3110         kfree(sdev);
3111
3112         TRACE_EXIT();
3113 }
3114
3115 /*
3116  * Callback function called by the InfiniBand core when either an InfiniBand
3117  * device has been removed or during the ib_unregister_client() call for each
3118  * registered InfiniBand device.
3119  */
3120 static void srpt_remove_one(struct ib_device *device)
3121 {
3122         int i;
3123         struct srpt_device *sdev;
3124
3125         TRACE_ENTRY();
3126
3127         sdev = ib_get_client_data(device, &srpt_client);
3128 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
3129         WARN_ON(!sdev);
3130         if (!sdev)
3131                 return;
3132 #else
3133         if (WARN_ON(!sdev))
3134                 return;
3135 #endif
3136
3137         /*
3138          * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
3139          * finished if it is running.
3140          */
3141         for (i = 0; i < sdev->device->phys_port_cnt; i++)
3142 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 22)
3143                 cancel_work_sync(&sdev->port[i].work);
3144 #else
3145                 /*
3146                  * cancel_work_sync() was introduced in kernel 2.6.22. Older
3147                  * kernels do not have a facility to cancel scheduled work.
3148                  */
3149                 PRINT_ERROR("%s",
3150                        "your kernel does not provide cancel_work_sync().");
3151 #endif
3152
3153         scst_unregister(sdev->scst_tgt);
3154         sdev->scst_tgt = NULL;
3155
3156         ib_unregister_event_handler(&sdev->event_handler);
3157         ib_destroy_cm_id(sdev->cm_id);
3158         ib_destroy_srq(sdev->srq);
3159         ib_dereg_mr(sdev->mr);
3160         ib_dealloc_pd(sdev->pd);
3161 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
3162         class_device_unregister(&sdev->class_dev);
3163 #else
3164         device_unregister(&sdev->dev);
3165 #endif
3166
3167         srpt_free_ioctx_ring(sdev);
3168         kfree(sdev);
3169
3170         TRACE_EXIT();
3171 }
3172
3173 #ifdef CONFIG_SCST_PROC
3174
3175 /**
3176  * Create procfs entries for srpt. Currently the only procfs entry created
3177  * by this function is the "trace_level" entry.
3178  */
3179 static int srpt_register_procfs_entry(struct scst_tgt_template *tgt)
3180 {
3181         int res = 0;
3182 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
3183         struct proc_dir_entry *p, *root;
3184
3185         root = scst_proc_get_tgt_root(tgt);
3186         WARN_ON(!root);
3187         if (root) {
3188                 /*
3189                  * Fill in the scst_proc_data::data pointer, which is used in
3190                  * a printk(KERN_INFO ...) statement in
3191                  * scst_proc_log_entry_write() in scst_proc.c.
3192                  */
3193                 srpt_log_proc_data.data = (char *)tgt->name;
3194                 p = scst_create_proc_entry(root, SRPT_PROC_TRACE_LEVEL_NAME,
3195                                            &srpt_log_proc_data);
3196                 if (!p)
3197                         res = -ENOMEM;
3198         } else
3199                 res = -ENOMEM;
3200
3201 #endif
3202         return res;
3203 }
3204
3205 static void srpt_unregister_procfs_entry(struct scst_tgt_template *tgt)
3206 {
3207 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
3208         struct proc_dir_entry *root;
3209
3210         root = scst_proc_get_tgt_root(tgt);
3211         WARN_ON(!root);
3212         if (root)
3213                 remove_proc_entry(SRPT_PROC_TRACE_LEVEL_NAME, root);
3214 #endif
3215 }
3216
3217 #endif /*CONFIG_SCST_PROC*/
3218
3219 /*
3220  * Module initialization.
3221  *
3222  * Note: since ib_register_client() registers callback functions, and since at
3223  * least one of these callback functions (srpt_add_one()) calls SCST functions,
3224  * the SCST target template must be registered before ib_register_client() is
3225  * called.
3226  */
3227 static int __init srpt_init_module(void)
3228 {
3229         int ret;
3230
3231         ret = -EINVAL;
3232         if (srp_max_message_size < MIN_MAX_MESSAGE_SIZE) {
3233                 PRINT_ERROR("invalid value %d for kernel module parameter"
3234                             " srp_max_message_size -- must be at least %d.",
3235                             srp_max_message_size,
3236                             MIN_MAX_MESSAGE_SIZE);
3237                 goto out;
3238         }
3239
3240         ret = class_register(&srpt_class);
3241         if (ret) {
3242                 PRINT_ERROR("%s", "couldn't register class ib_srpt");
3243                 goto out;
3244         }
3245
3246         ret = scst_register_target_template(&srpt_template);
3247         if (ret < 0) {
3248                 PRINT_ERROR("%s", "couldn't register with scst");
3249                 ret = -ENODEV;
3250                 goto out_unregister_class;
3251         }
3252
3253 #ifdef CONFIG_SCST_PROC
3254         ret = srpt_register_procfs_entry(&srpt_template);
3255         if (ret) {
3256                 PRINT_ERROR("%s", "couldn't register procfs entry");
3257                 goto out_unregister_target;
3258         }
3259 #endif /*CONFIG_SCST_PROC*/
3260
3261         ret = ib_register_client(&srpt_client);
3262         if (ret) {
3263                 PRINT_ERROR("%s", "couldn't register IB client");
3264                 goto out_unregister_target;
3265         }
3266
3267         if (thread) {
3268                 spin_lock_init(&srpt_thread.thread_lock);
3269                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
3270                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
3271                                                  NULL, "srpt_thread");
3272                 if (IS_ERR(srpt_thread.thread)) {
3273                         srpt_thread.thread = NULL;
3274                         thread = 0;
3275                 }
3276         }
3277
3278         return 0;
3279
3280 out_unregister_target:
3281 #ifdef CONFIG_SCST_PROC
3282         /*
3283          * Note: the procfs entry is unregistered in srpt_release(), which is
3284          * called by scst_unregister_target_template().
3285          */
3286 #endif /*CONFIG_SCST_PROC*/
3287         scst_unregister_target_template(&srpt_template);
3288 out_unregister_class:
3289         class_unregister(&srpt_class);
3290 out:
3291         return ret;
3292 }
3293
3294 static void __exit srpt_cleanup_module(void)
3295 {
3296         TRACE_ENTRY();
3297
3298         if (srpt_thread.thread)
3299                 kthread_stop(srpt_thread.thread);
3300         ib_unregister_client(&srpt_client);
3301         scst_unregister_target_template(&srpt_template);
3302         class_unregister(&srpt_class);
3303
3304         TRACE_EXIT();
3305 }
3306
3307 module_init(srpt_init_module);
3308 module_exit(srpt_cleanup_module);