Should have been included in the previous commit (r988).
[mirror/scst/.git] / srpt / src / ib_srpt.c
1 /*
2  * Copyright (c) 2006 - 2009 Mellanox Technology Inc.  All rights reserved.
3  * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4  * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  */
35
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
43
44 #include <asm/atomic.h>
45
46 #include "ib_srpt.h"
47 #include "scst_debug.h"
48
49 /* Name of this kernel module. */
50 #define DRV_NAME                "ib_srpt"
51 /* Prefix for printk() kernel messages. */
52 #define PFX                     DRV_NAME ": "
53 #define DRV_VERSION             "1.0.1"
54 #define DRV_RELDATE             "July 10, 2008"
55 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
56 /* Flags to be used in SCST debug tracing statements. */
57 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
58                                   | TRACE_MGMT | TRACE_SPECIAL)
59 #endif
60
61 #define MELLANOX_SRPT_ID_STRING "Mellanox OFED SRP target"
62
63 MODULE_AUTHOR("Vu Pham");
64 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
65                    "v" DRV_VERSION " (" DRV_RELDATE ")");
66 MODULE_LICENSE("Dual BSD/GPL");
67
68 struct srpt_thread {
69         /* Protects thread_ioctx_list. */
70         spinlock_t thread_lock;
71         /* I/O contexts to be processed by the kernel thread. */
72         struct list_head thread_ioctx_list;
73         /* SRPT kernel thread. */
74         struct task_struct *thread;
75 };
76
77 /*
78  * Global Variables
79  */
80
81 static u64 mellanox_ioc_guid;
82 /* List of srpt_device structures. */
83 static struct list_head srpt_devices;
84 static int thread;
85 static struct srpt_thread srpt_thread;
86 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
87 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
88 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
89 module_param(trace_flag, long, 0644);
90 MODULE_PARM_DESC(trace_flag,
91                  "Trace flags for the ib_srpt kernel module.");
92 #endif
93
94 module_param(thread, int, 0444);
95 MODULE_PARM_DESC(thread,
96                  "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
97                  "where possible");
98
99 static void srpt_add_one(struct ib_device *device);
100 static void srpt_remove_one(struct ib_device *device);
101 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq);
102 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
103
104 static struct ib_client srpt_client = {
105         .name = DRV_NAME,
106         .add = srpt_add_one,
107         .remove = srpt_remove_one
108 };
109
110 /*
111  * Callback function called by the InfiniBand core when an asynchronous IB
112  * event occurs. This callback may occur in interrupt context. See also
113  * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
114  * Architecture Specification.
115  */
116 static void srpt_event_handler(struct ib_event_handler *handler,
117                                struct ib_event *event)
118 {
119         struct srpt_device *sdev =
120             ib_get_client_data(event->device, &srpt_client);
121         struct srpt_port *sport;
122
123         if (!sdev || sdev->device != event->device)
124                 return;
125
126         printk(KERN_WARNING PFX "ASYNC event= %d on device= %s\n",
127                 event->event, sdev->device->name);
128
129         switch (event->event) {
130         case IB_EVENT_PORT_ERR:
131                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
132                         sport = &sdev->port[event->element.port_num - 1];
133                         sport->lid = 0;
134                         sport->sm_lid = 0;
135                 }
136                 break;
137         case IB_EVENT_PORT_ACTIVE:
138         case IB_EVENT_LID_CHANGE:
139         case IB_EVENT_PKEY_CHANGE:
140         case IB_EVENT_SM_CHANGE:
141         case IB_EVENT_CLIENT_REREGISTER:
142                 /*
143                  * Refresh port data asynchronously. Note: it is safe to call
144                  * schedule_work() even if &sport->work is already on the
145                  * global workqueue because schedule_work() tests for the
146                  * work_pending() condition before adding &sport->work to the
147                  * global work queue.
148                  */
149                 if (event->element.port_num <= sdev->device->phys_port_cnt) {
150                         sport = &sdev->port[event->element.port_num - 1];
151                         if (!sport->lid && !sport->sm_lid)
152                                 schedule_work(&sport->work);
153                 }
154                 break;
155         default:
156                 break;
157         }
158
159 }
160
161 /*
162  * Callback function called by the InfiniBand core for SRQ (shared receive
163  * queue) events.
164  */
165 static void srpt_srq_event(struct ib_event *event, void *ctx)
166 {
167         printk(KERN_WARNING PFX "SRQ event %d\n", event->event);
168 }
169
170 /*
171  * Callback function called by the InfiniBand core for QP (queue pair) events.
172  */
173 static void srpt_qp_event(struct ib_event *event, void *ctx)
174 {
175         struct srpt_rdma_ch *ch = ctx;
176
177         printk(KERN_WARNING PFX
178                "QP event %d on cm_id=%p sess_name=%s state=%d\n",
179                event->event, ch->cm_id, ch->sess_name, ch->state);
180
181         switch (event->event) {
182         case IB_EVENT_COMM_EST:
183 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
184                 ib_cm_notify(ch->cm_id, event->event);
185 #else
186                 /* Vanilla 2.6.19 kernel (or before) without OFED. */
187                 printk(KERN_ERR PFX "how to perform ib_cm_notify() on a"
188                         " vanilla 2.6.18 kernel ???\n");
189 #endif
190                 break;
191         case IB_EVENT_QP_LAST_WQE_REACHED:
192                 if (ch->state == RDMA_CHANNEL_LIVE) {
193                         printk(KERN_WARNING PFX
194                                "Schedule CM_DISCONNECT_WORK\n");
195                         srpt_disconnect_channel(ch, 1);
196                 }
197                 break;
198         default:
199                 break;
200         }
201 }
202
203 /*
204  * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
205  * the lowest four bits of value in element slot of the array of four bit
206  * elements called c_list (controller list). The index slot is one-based.
207  *
208  * @pre 1 <= slot && 0 <= value && value < 16
209  */
210 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
211 {
212         u16 id;
213         u8 tmp;
214
215         id = (slot - 1) / 2;
216         if (slot & 0x1) {
217                 tmp = c_list[id] & 0xf;
218                 c_list[id] = (value << 4) | tmp;
219         } else {
220                 tmp = c_list[id] & 0xf0;
221                 c_list[id] = (value & 0xf) | tmp;
222         }
223 }
224
225 /*
226  * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
227  * ClassPortInfo in the InfiniBand Architecture Specification.
228  */
229 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
230 {
231         struct ib_class_port_info *cif;
232
233         cif = (struct ib_class_port_info *)mad->data;
234         memset(cif, 0, sizeof *cif);
235         cif->base_version = 1;
236         cif->class_version = 1;
237         cif->resp_time_value = 20;
238
239         mad->mad_hdr.status = 0;
240 }
241
242 /*
243  * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
244  * InfiniBand Architecture Specification. See also section B.7,
245  * table B.6 in the T10 SRP r16a document.
246  */
247 static void srpt_get_iou(struct ib_dm_mad *mad)
248 {
249         struct ib_dm_iou_info *ioui;
250         u8 slot;
251         int i;
252
253         ioui = (struct ib_dm_iou_info *)mad->data;
254         ioui->change_id = 1;
255         ioui->max_controllers = 16;
256
257         /* set present for slot 1 and empty for the rest */
258         srpt_set_ioc(ioui->controller_list, 1, 1);
259         for (i = 1, slot = 2; i < 16; i++, slot++)
260                 srpt_set_ioc(ioui->controller_list, slot, 0);
261
262         mad->mad_hdr.status = 0;
263 }
264
265 /*
266  * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
267  * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
268  * Specification. See also section B.7, table B.7 in the T10 SRP r16a
269  * document.
270  */
271 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
272                          struct ib_dm_mad *mad)
273 {
274         struct ib_dm_ioc_profile *iocp;
275
276         iocp = (struct ib_dm_ioc_profile *)mad->data;
277
278         if (!slot || slot > 16) {
279                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
280                 return;
281         }
282
283         if (slot > 2) {
284                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
285                 return;
286         }
287
288         memset(iocp, 0, sizeof *iocp);
289         strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
290         iocp->guid = cpu_to_be64(mellanox_ioc_guid);
291         iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
292         iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
293         iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
294         iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
295         iocp->subsys_device_id = 0x0;
296         iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
297         iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
298         iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
299         iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
300         iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
301         iocp->rdma_read_depth = 4;
302         iocp->send_size = cpu_to_be32(MAX_MESSAGE_SIZE);
303         iocp->rdma_size = cpu_to_be32(MAX_RDMA_SIZE);
304         iocp->num_svc_entries = 1;
305         iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
306             SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
307
308         mad->mad_hdr.status = 0;
309 }
310
311 /*
312  * Device management: write ServiceEntries to mad for the given slot. See also
313  * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
314  * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
315  */
316 static void srpt_get_svc_entries(u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
317 {
318         struct ib_dm_svc_entries *svc_entries;
319
320         if (!slot || slot > 16) {
321                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
322                 return;
323         }
324
325         if (slot > 2 || lo > hi || hi > 1) {
326                 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
327                 return;
328         }
329
330         svc_entries = (struct ib_dm_svc_entries *)mad->data;
331         memset(svc_entries, 0, sizeof *svc_entries);
332         svc_entries->service_entries[0].id = cpu_to_be64(mellanox_ioc_guid);
333         sprintf(svc_entries->service_entries[0].name, "%s%016llx",
334                 SRP_SERVICE_NAME_PREFIX, (unsigned long long)mellanox_ioc_guid);
335
336         mad->mad_hdr.status = 0;
337 }
338
339 /*
340  * Actual processing of a received MAD *rq_mad received through source port *sp
341  * (MAD = InfiniBand management datagram). The response to be sent back is
342  * written to *rsp_mad.
343  */
344 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
345                                  struct ib_dm_mad *rsp_mad)
346 {
347         u16 attr_id;
348         u32 slot;
349         u8 hi, lo;
350
351         attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
352         switch (attr_id) {
353         case DM_ATTR_CLASS_PORT_INFO:
354                 srpt_get_class_port_info(rsp_mad);
355                 break;
356         case DM_ATTR_IOU_INFO:
357                 srpt_get_iou(rsp_mad);
358                 break;
359         case DM_ATTR_IOC_PROFILE:
360                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
361                 srpt_get_ioc(sp->sdev, slot, rsp_mad);
362                 break;
363         case DM_ATTR_SVC_ENTRIES:
364                 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
365                 hi = (u8) ((slot >> 8) & 0xff);
366                 lo = (u8) (slot & 0xff);
367                 slot = (u16) ((slot >> 16) & 0xffff);
368                 srpt_get_svc_entries(slot, hi, lo, rsp_mad);
369                 break;
370         default:
371                 rsp_mad->mad_hdr.status =
372                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
373                 break;
374         }
375 }
376
377 /*
378  * Callback function that is called by the InfiniBand core after transmission of
379  * a MAD. (MAD = management datagram; AH = address handle.)
380  */
381 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
382                                   struct ib_mad_send_wc *mad_wc)
383 {
384         ib_destroy_ah(mad_wc->send_buf->ah);
385         ib_free_send_mad(mad_wc->send_buf);
386 }
387
388 /*
389  * Callback function that is called by the InfiniBand core after reception of
390  * a MAD (management datagram).
391  */
392 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
393                                   struct ib_mad_recv_wc *mad_wc)
394 {
395         struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
396         struct ib_ah *ah;
397         struct ib_mad_send_buf *rsp;
398         struct ib_dm_mad *dm_mad;
399
400         if (!mad_wc || !mad_wc->recv_buf.mad)
401                 return;
402
403         ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
404                                   mad_wc->recv_buf.grh, mad_agent->port_num);
405         if (IS_ERR(ah))
406                 goto err;
407
408         BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
409
410         rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
411                                  mad_wc->wc->pkey_index, 0,
412                                  IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
413                                  GFP_KERNEL);
414         if (IS_ERR(rsp))
415                 goto err_rsp;
416
417         rsp->ah = ah;
418
419         dm_mad = rsp->mad;
420         memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
421         dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
422         dm_mad->mad_hdr.status = 0;
423
424         switch (mad_wc->recv_buf.mad->mad_hdr.method) {
425         case IB_MGMT_METHOD_GET:
426                 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
427                 break;
428         case IB_MGMT_METHOD_SET:
429                 dm_mad->mad_hdr.status =
430                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
431                 break;
432         default:
433                 dm_mad->mad_hdr.status =
434                     cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
435                 break;
436         }
437
438         if (!ib_post_send_mad(rsp, NULL)) {
439                 ib_free_recv_mad(mad_wc);
440                 /* will destroy_ah & free_send_mad in send completion */
441                 return;
442         }
443
444         ib_free_send_mad(rsp);
445
446 err_rsp:
447         ib_destroy_ah(ah);
448 err:
449         ib_free_recv_mad(mad_wc);
450 }
451
452 /*
453  * Enable InfiniBand management datagram processing, update the cached sm_lid,
454  * lid and gid values, and register a callback function for processing MADs
455  * on the specified port. It is safe to call this function more than once for
456  * the same port.
457  */
458 static int srpt_refresh_port(struct srpt_port *sport)
459 {
460         struct ib_mad_reg_req reg_req;
461         struct ib_port_modify port_modify;
462         struct ib_port_attr port_attr;
463         int ret;
464
465         memset(&port_modify, 0, sizeof port_modify);
466         port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
467         port_modify.clr_port_cap_mask = 0;
468
469         ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
470         if (ret)
471                 goto err_mod_port;
472
473         ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
474         if (ret)
475                 goto err_query_port;
476
477         sport->sm_lid = port_attr.sm_lid;
478         sport->lid = port_attr.lid;
479
480         ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
481         if (ret)
482                 goto err_query_port;
483
484         if (!sport->mad_agent) {
485                 memset(&reg_req, 0, sizeof reg_req);
486                 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
487                 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
488                 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
489                 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
490
491                 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
492                                                          sport->port,
493                                                          IB_QPT_GSI,
494                                                          &reg_req, 0,
495                                                          srpt_mad_send_handler,
496                                                          srpt_mad_recv_handler,
497                                                          sport);
498                 if (IS_ERR(sport->mad_agent)) {
499                         ret = PTR_ERR(sport->mad_agent);
500                         sport->mad_agent = NULL;
501                         goto err_query_port;
502                 }
503         }
504
505         return 0;
506
507 err_query_port:
508
509         port_modify.set_port_cap_mask = 0;
510         port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
511         ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
512
513 err_mod_port:
514
515         return ret;
516 }
517
518 /*
519  * Unregister the callback function for processing MADs and disable MAD
520  * processing for all ports of the specified device. It is safe to call this
521  * function more than once for the same device.
522  */
523 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
524 {
525         struct ib_port_modify port_modify = {
526                 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
527         };
528         struct srpt_port *sport;
529         int i;
530
531         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
532                 sport = &sdev->port[i - 1];
533                 WARN_ON(sport->port != i);
534                 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
535                         printk(KERN_ERR PFX "disabling MAD processing"
536                                " failed.\n");
537                 if (sport->mad_agent) {
538                         ib_unregister_mad_agent(sport->mad_agent);
539                         sport->mad_agent = NULL;
540                 }
541         }
542 }
543
544 /*
545  * Allocate and initialize an SRPT I/O context structure.
546  */
547 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
548 {
549         struct srpt_ioctx *ioctx;
550
551         ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
552         if (!ioctx)
553                 goto out;
554
555         ioctx->buf = kzalloc(MAX_MESSAGE_SIZE, GFP_KERNEL);
556         if (!ioctx->buf)
557                 goto out_free_ioctx;
558
559         ioctx->dma = dma_map_single(sdev->device->dma_device, ioctx->buf,
560                                     MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
561 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
562         if (dma_mapping_error(sdev->device->dma_device, ioctx->dma))
563 #else
564         if (dma_mapping_error(ioctx->dma))
565 #endif
566                 goto out_free_buf;
567
568         return ioctx;
569
570 out_free_buf:
571         kfree(ioctx->buf);
572 out_free_ioctx:
573         kfree(ioctx);
574 out:
575         return NULL;
576 }
577
578 /*
579  * Deallocate an SRPT I/O context structure.
580  */
581 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
582 {
583         if (!ioctx)
584                 return;
585
586         dma_unmap_single(sdev->device->dma_device, ioctx->dma,
587                          MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
588         kfree(ioctx->buf);
589         kfree(ioctx);
590 }
591
592 /*
593  * Associate a ring of SRPT I/O context structures with the specified device.
594  */
595 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
596 {
597         int i;
598
599         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
600                 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
601
602                 if (!sdev->ioctx_ring[i])
603                         goto err;
604
605                 sdev->ioctx_ring[i]->index = i;
606         }
607
608         return 0;
609
610 err:
611         while (--i > 0) {
612                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
613                 sdev->ioctx_ring[i] = NULL;
614         }
615         return -ENOMEM;
616 }
617
618 /* Free the ring of SRPT I/O context structures. */
619 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
620 {
621         int i;
622
623         for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
624                 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
625                 sdev->ioctx_ring[i] = NULL;
626         }
627 }
628
629 /*
630  * Post a receive request on the work queue of InfiniBand device 'sdev'.
631  */
632 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
633 {
634         struct ib_sge list;
635         struct ib_recv_wr wr, *bad_wr;
636
637         wr.wr_id = ioctx->index | SRPT_OP_RECV;
638
639         list.addr = ioctx->dma;
640         list.length = MAX_MESSAGE_SIZE;
641         list.lkey = sdev->mr->lkey;
642
643         wr.next = NULL;
644         wr.sg_list = &list;
645         wr.num_sge = 1;
646
647         return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
648 }
649
650 /*
651  * Post a send request on the SRPT RDMA channel 'ch'.
652  */
653 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
654                           int len)
655 {
656         struct ib_sge list;
657         struct ib_send_wr wr, *bad_wr;
658         struct srpt_device *sdev = ch->sport->sdev;
659
660         dma_sync_single_for_device(sdev->device->dma_device, ioctx->dma,
661                                    MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
662
663         list.addr = ioctx->dma;
664         list.length = len;
665         list.lkey = sdev->mr->lkey;
666
667         wr.next = NULL;
668         wr.wr_id = ioctx->index;
669         wr.sg_list = &list;
670         wr.num_sge = 1;
671         wr.opcode = IB_WR_SEND;
672         wr.send_flags = IB_SEND_SIGNALED;
673
674         return ib_post_send(ch->qp, &wr, &bad_wr);
675 }
676
677 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
678                              int *ind)
679 {
680         struct srp_indirect_buf *idb;
681         struct srp_direct_buf *db;
682
683         *ind = 0;
684         if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
685             ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
686                 ioctx->n_rbuf = 1;
687                 ioctx->rbufs = &ioctx->single_rbuf;
688
689                 db = (void *)srp_cmd->add_data;
690                 memcpy(ioctx->rbufs, db, sizeof *db);
691                 ioctx->data_len = be32_to_cpu(db->len);
692         } else {
693                 idb = (void *)srp_cmd->add_data;
694
695                 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
696
697                 if (ioctx->n_rbuf >
698                     (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
699                         *ind = 1;
700                         ioctx->n_rbuf = 0;
701                         goto out;
702                 }
703
704                 if (ioctx->n_rbuf == 1)
705                         ioctx->rbufs = &ioctx->single_rbuf;
706                 else
707                         ioctx->rbufs =
708                                 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
709                 if (!ioctx->rbufs) {
710                         ioctx->n_rbuf = 0;
711                         return -ENOMEM;
712                 }
713
714                 db = idb->desc_list;
715                 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
716                 ioctx->data_len = be32_to_cpu(idb->len);
717         }
718 out:
719         return 0;
720 }
721
722 /*
723  * Modify the attributes of queue pair 'qp': allow local write, remote read,
724  * and remote write. Also transition 'qp' to state IB_QPS_INIT.
725  */
726 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
727 {
728         struct ib_qp_attr *attr;
729         int ret;
730
731         attr = kzalloc(sizeof *attr, GFP_KERNEL);
732         if (!attr)
733                 return -ENOMEM;
734
735         attr->qp_state = IB_QPS_INIT;
736         attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
737             IB_ACCESS_REMOTE_WRITE;
738         attr->port_num = ch->sport->port;
739         attr->pkey_index = 0;
740
741         ret = ib_modify_qp(qp, attr,
742                            IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
743                            IB_QP_PKEY_INDEX);
744
745         kfree(attr);
746         return ret;
747 }
748
749 static int srpt_ch_qp_rtr_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp,
750                               enum ib_qp_state qp_state)
751 {
752         struct ib_qp_attr *qp_attr;
753         int attr_mask;
754         int ret;
755
756         qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
757         if (!qp_attr)
758                 return -ENOMEM;
759
760         qp_attr->qp_state = qp_state;
761         ret = ib_cm_init_qp_attr(ch->cm_id, qp_attr, &attr_mask);
762         if (ret)
763                 goto out;
764
765         if (qp_state == IB_QPS_RTR)
766                 qp_attr->max_dest_rd_atomic = 4;
767         else
768                 qp_attr->max_rd_atomic = 4;
769
770         ret = ib_modify_qp(qp, qp_attr, attr_mask);
771
772 out:
773         kfree(qp_attr);
774         return ret;
775 }
776
777 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
778 {
779         int i;
780
781         if (ioctx->n_rdma_ius > 0 && ioctx->rdma_ius) {
782                 struct rdma_iu *riu = ioctx->rdma_ius;
783
784                 for (i = 0; i < ioctx->n_rdma_ius; ++i, ++riu)
785                         kfree(riu->sge);
786                 kfree(ioctx->rdma_ius);
787         }
788
789         if (ioctx->n_rbuf > 1)
790                 kfree(ioctx->rbufs);
791
792         if (srpt_post_recv(ch->sport->sdev, ioctx))
793                 printk(KERN_ERR PFX "SRQ post_recv failed - this is serious\n");
794                 /* we should queue it back to free_ioctx queue */
795         else
796                 atomic_inc(&ch->req_lim_delta);
797 }
798
799 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
800 {
801         struct srpt_ioctx *ioctx;
802         struct srpt_device *sdev = ch->sport->sdev;
803         scst_data_direction dir = SCST_DATA_NONE;
804
805         if (wc->wr_id & SRPT_OP_RECV) {
806                 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
807                 printk(KERN_ERR PFX "This is serious - SRQ is in bad state\n");
808         } else {
809                 ioctx = sdev->ioctx_ring[wc->wr_id];
810
811                 if (ioctx->scmnd) {
812                         struct scst_cmd *scmnd = ioctx->scmnd;
813
814                         dir = scst_cmd_get_data_direction(scmnd);
815
816                         if (dir == SCST_DATA_NONE)
817                                 scst_tgt_cmd_done(scmnd,
818                                         scst_estimate_context());
819                         else {
820                                 dma_unmap_sg(sdev->device->dma_device,
821                                              scst_cmd_get_sg(scmnd),
822                                              scst_cmd_get_sg_cnt(scmnd),
823                                              scst_to_tgt_dma_dir(dir));
824
825                                 if (scmnd->state == SCST_CMD_STATE_DATA_WAIT)
826                                         scst_rx_data(scmnd,
827                                                      SCST_RX_STATUS_ERROR,
828                                                      SCST_CONTEXT_THREAD);
829                                 else if (scmnd->state == SCST_CMD_STATE_XMIT_WAIT)
830                                         scst_tgt_cmd_done(scmnd,
831                                                 scst_estimate_context());
832                         }
833                 } else
834                         srpt_reset_ioctx(ch, ioctx);
835         }
836 }
837
838 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
839                                   struct srpt_ioctx *ioctx,
840                                   enum scst_exec_context context)
841 {
842         if (ioctx->scmnd) {
843                 scst_data_direction dir =
844                         scst_cmd_get_data_direction(ioctx->scmnd);
845
846                 if (dir != SCST_DATA_NONE)
847                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
848                                      scst_cmd_get_sg(ioctx->scmnd),
849                                      scst_cmd_get_sg_cnt(ioctx->scmnd),
850                                      scst_to_tgt_dma_dir(dir));
851
852                 scst_tgt_cmd_done(ioctx->scmnd, context);
853         } else
854                 srpt_reset_ioctx(ch, ioctx);
855 }
856
857 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
858                                   struct srpt_ioctx *ioctx)
859 {
860         if (!ioctx->scmnd) {
861                 srpt_reset_ioctx(ch, ioctx);
862                 return;
863         }
864
865         if (scst_cmd_get_data_direction(ioctx->scmnd) == SCST_DATA_WRITE)
866                 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
867                         scst_estimate_context());
868 }
869
870 static void srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
871                                struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
872                                u64 tag)
873 {
874         struct srp_rsp *srp_rsp;
875         struct sense_data *sense;
876         int limit_delta;
877
878         srp_rsp = ioctx->buf;
879         memset(srp_rsp, 0, sizeof *srp_rsp);
880
881         limit_delta = atomic_read(&ch->req_lim_delta);
882         atomic_sub(limit_delta, &ch->req_lim_delta);
883
884         srp_rsp->opcode = SRP_RSP;
885         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
886         srp_rsp->tag = tag;
887
888         if (s_key != NO_SENSE) {
889                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
890                 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
891                 srp_rsp->sense_data_len =
892                     cpu_to_be32(sizeof *sense + (sizeof *sense % 4));
893
894                 sense = (struct sense_data *)(srp_rsp + 1);
895                 sense->err_code = 0x70;
896                 sense->key = s_key;
897                 sense->asc_ascq = s_code;
898         }
899 }
900
901 static void srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
902                                    struct srpt_ioctx *ioctx, u8 rsp_code,
903                                    u64 tag)
904 {
905         struct srp_rsp *srp_rsp;
906         int limit_delta;
907
908         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
909                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
910
911         srp_rsp = ioctx->buf;
912         memset(srp_rsp, 0, sizeof *srp_rsp);
913
914         limit_delta = atomic_read(&ch->req_lim_delta);
915         atomic_sub(limit_delta, &ch->req_lim_delta);
916
917         srp_rsp->opcode = SRP_RSP;
918         srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
919         srp_rsp->tag = tag;
920
921         if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
922                 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
923                 srp_rsp->resp_data_len = cpu_to_be32(4);
924                 srp_rsp->data[3] = rsp_code;
925         }
926 }
927
928 /*
929  * Process SRP_CMD.
930  */
931 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
932 {
933         struct scst_cmd *scmnd = NULL;
934         struct srp_cmd *srp_cmd = NULL;
935         scst_data_direction dir = SCST_DATA_NONE;
936         int indirect_desc = 0;
937         int ret;
938         unsigned long flags;
939
940         srp_cmd = ioctx->buf;
941
942         if (srp_cmd->buf_fmt) {
943                 ret = srpt_get_desc_tbl(ioctx, srp_cmd, &indirect_desc);
944                 if (ret) {
945                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
946                                            NO_ADD_SENSE, srp_cmd->tag);
947                         ((struct srp_rsp *)ioctx->buf)->status =
948                                         SAM_STAT_TASK_SET_FULL;
949                         goto send_rsp;
950                 }
951
952                 if (indirect_desc) {
953                         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
954                                            NO_ADD_SENSE, srp_cmd->tag);
955                         ((struct srp_rsp *)ioctx->buf)->status =
956                                         SAM_STAT_TASK_SET_FULL;
957                         goto send_rsp;
958                 }
959
960                 if (srp_cmd->buf_fmt & 0xf)
961                         dir = SCST_DATA_READ;
962                 else if (srp_cmd->buf_fmt >> 4)
963                         dir = SCST_DATA_WRITE;
964                 else
965                         dir = SCST_DATA_NONE;
966         } else
967                 dir = SCST_DATA_NONE;
968
969         scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
970                             sizeof srp_cmd->lun, srp_cmd->cdb, 16,
971                             thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
972         if (!scmnd) {
973                 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
974                                    NO_ADD_SENSE, srp_cmd->tag);
975                 ((struct srp_rsp *)ioctx->buf)->status =
976                         SAM_STAT_TASK_SET_FULL;
977                 goto send_rsp;
978         }
979
980         ioctx->scmnd = scmnd;
981
982         switch (srp_cmd->task_attr) {
983         case SRP_CMD_HEAD_OF_Q:
984                 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
985                 break;
986         case SRP_CMD_ORDERED_Q:
987                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
988                 break;
989         case SRP_CMD_SIMPLE_Q:
990                 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
991                 break;
992         case SRP_CMD_ACA:
993                 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
994                 break;
995         default:
996                 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
997                 break;
998         }
999
1000         scst_cmd_set_tag(scmnd, srp_cmd->tag);
1001         scst_cmd_set_tgt_priv(scmnd, ioctx);
1002         scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
1003
1004         spin_lock_irqsave(&ch->spinlock, flags);
1005         list_add_tail(&ioctx->scmnd_list, &ch->active_scmnd_list);
1006         ch->active_scmnd_cnt++;
1007         spin_unlock_irqrestore(&ch->spinlock, flags);
1008
1009         scst_cmd_init_done(scmnd, scst_estimate_context());
1010
1011         return 0;
1012
1013 send_rsp:
1014         return -1;
1015 }
1016
1017 /*
1018  * Process SRP_TSK_MGMT. See also table 19 in the T10 SRP r16a document.
1019  */
1020 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1021                                 struct srpt_ioctx *ioctx)
1022 {
1023         struct srp_tsk_mgmt *srp_tsk = NULL;
1024         struct srpt_mgmt_ioctx *mgmt_ioctx;
1025         int ret;
1026
1027         srp_tsk = ioctx->buf;
1028
1029         printk(KERN_WARNING PFX
1030                "recv_tsk_mgmt= %d for task_tag= %lld"
1031                " using tag= %lld cm_id= %p sess= %p\n",
1032                srp_tsk->tsk_mgmt_func,
1033                (unsigned long long) srp_tsk->task_tag,
1034                (unsigned long long) srp_tsk->tag,
1035                ch->cm_id, ch->scst_sess);
1036
1037         mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1038         if (!mgmt_ioctx) {
1039                 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1040                                        srp_tsk->tag);
1041                 goto send_rsp;
1042         }
1043
1044         mgmt_ioctx->ioctx = ioctx;
1045         mgmt_ioctx->ch = ch;
1046         mgmt_ioctx->tag = srp_tsk->tag;
1047
1048         switch (srp_tsk->tsk_mgmt_func) {
1049         case SRP_TSK_ABORT_TASK:
1050                 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1051                                           SCST_ABORT_TASK,
1052                                           srp_tsk->task_tag,
1053                                           thread ?
1054                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1055                                           mgmt_ioctx);
1056                 break;
1057         case SRP_TSK_ABORT_TASK_SET:
1058                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1059                                           SCST_ABORT_TASK_SET,
1060                                           (u8 *) &srp_tsk->lun,
1061                                           sizeof srp_tsk->lun,
1062                                           thread ?
1063                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1064                                           mgmt_ioctx);
1065                 break;
1066         case SRP_TSK_CLEAR_TASK_SET:
1067                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1068                                           SCST_CLEAR_TASK_SET,
1069                                           (u8 *) &srp_tsk->lun,
1070                                           sizeof srp_tsk->lun,
1071                                           thread ?
1072                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1073                                           mgmt_ioctx);
1074                 break;
1075 #if 0
1076         case SRP_TSK_LUN_RESET:
1077                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1078                                           SCST_LUN_RESET,
1079                                           (u8 *) &srp_tsk->lun,
1080                                           sizeof srp_tsk->lun,
1081                                           thread ?
1082                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1083                                           mgmt_ioctx);
1084                 break;
1085 #endif
1086         case SRP_TSK_CLEAR_ACA:
1087                 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1088                                           SCST_CLEAR_ACA,
1089                                           (u8 *) &srp_tsk->lun,
1090                                           sizeof srp_tsk->lun,
1091                                           thread ?
1092                                           SCST_NON_ATOMIC : SCST_ATOMIC,
1093                                           mgmt_ioctx);
1094                 break;
1095         default:
1096                 srpt_build_tskmgmt_rsp(ch, ioctx,
1097                                        SRP_TSK_MGMT_FUNC_NOT_SUPP,
1098                                        srp_tsk->tag);
1099                 goto send_rsp;
1100         }
1101         return 0;
1102
1103 send_rsp:
1104         return -1;
1105 }
1106
1107 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1108                                struct srpt_ioctx *ioctx)
1109 {
1110         u8 op;
1111         unsigned long flags;
1112
1113         if (ch->state != RDMA_CHANNEL_LIVE) {
1114                 if (ch->state == RDMA_CHANNEL_CONNECTING) {
1115                         spin_lock_irqsave(&ch->spinlock, flags);
1116                         list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1117                         spin_unlock_irqrestore(&ch->spinlock, flags);
1118                 } else
1119                         srpt_reset_ioctx(ch, ioctx);
1120
1121                 return;
1122         }
1123
1124         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
1125                                 MAX_MESSAGE_SIZE, DMA_FROM_DEVICE);
1126
1127         ioctx->data_len = 0;
1128         ioctx->n_rbuf = 0;
1129         ioctx->rbufs = NULL;
1130         ioctx->n_rdma = 0;
1131         ioctx->n_rdma_ius = 0;
1132         ioctx->rdma_ius = NULL;
1133         ioctx->scmnd = NULL;
1134
1135         op = *(u8 *) ioctx->buf;
1136         switch (op) {
1137         case SRP_CMD:
1138                 if (srpt_handle_cmd(ch, ioctx) < 0)
1139                         goto send_rsp;
1140                 break;
1141
1142         case SRP_TSK_MGMT:
1143                 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1144                         goto send_rsp;
1145                 break;
1146
1147         case SRP_I_LOGOUT:
1148         case SRP_AER_REQ:
1149         default:
1150                 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1151                                    ((struct srp_cmd *)ioctx->buf)->tag);
1152
1153                 goto send_rsp;
1154         }
1155
1156         dma_sync_single_for_device(ch->sport->sdev->device->dma_device,
1157                                    ioctx->dma, MAX_MESSAGE_SIZE,
1158                                    DMA_FROM_DEVICE);
1159
1160         return;
1161
1162 send_rsp:
1163         if (ch->state != RDMA_CHANNEL_LIVE ||
1164             srpt_post_send(ch, ioctx,
1165                            sizeof(struct srp_rsp) +
1166                            be32_to_cpu(((struct srp_rsp *)ioctx->buf)->
1167                                        sense_data_len)))
1168                 srpt_reset_ioctx(ch, ioctx);
1169 }
1170
1171 /*
1172  * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1173  * should stop.
1174  * @pre thread != 0
1175  */
1176 static inline int srpt_test_ioctx_list(void)
1177 {
1178         int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1179                    unlikely(kthread_should_stop()));
1180         return res;
1181 }
1182
1183 /*
1184  * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1185  *
1186  * @pre thread != 0
1187  */
1188 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1189 {
1190         unsigned long flags;
1191
1192         spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1193         list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1194         spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1195         wake_up(&ioctx_list_waitQ);
1196 }
1197
1198 /*
1199  * InfiniBand CQ (completion queue) event handler for asynchronous events not
1200  * associated with a completion.
1201  */
1202 static void srpt_completion(struct ib_cq *cq, void *ctx)
1203 {
1204         struct srpt_rdma_ch *ch = ctx;
1205         struct srpt_device *sdev = ch->sport->sdev;
1206         struct ib_wc wc;
1207         struct srpt_ioctx *ioctx;
1208
1209         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1210         while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1211                 if (wc.status) {
1212                         printk(KERN_ERR PFX "failed %s status= %d\n",
1213                                wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1214                                wc.status);
1215                         srpt_handle_err_comp(ch, &wc);
1216                         break;
1217                 }
1218
1219                 if (wc.wr_id & SRPT_OP_RECV) {
1220                         ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1221                         if (thread) {
1222                                 ioctx->ch = ch;
1223                                 ioctx->op = IB_WC_RECV;
1224                                 srpt_schedule_thread(ioctx);
1225                         } else
1226                                 srpt_handle_new_iu(ch, ioctx);
1227                         continue;
1228                 } else
1229                         ioctx = sdev->ioctx_ring[wc.wr_id];
1230
1231                 if (thread) {
1232                         ioctx->ch = ch;
1233                         ioctx->op = wc.opcode;
1234                         srpt_schedule_thread(ioctx);
1235                 } else {
1236                         switch (wc.opcode) {
1237                         case IB_WC_SEND:
1238                                 srpt_handle_send_comp(ch, ioctx,
1239                                         scst_estimate_context());
1240                                 break;
1241                         case IB_WC_RDMA_WRITE:
1242                         case IB_WC_RDMA_READ:
1243                                 srpt_handle_rdma_comp(ch, ioctx);
1244                                 break;
1245                         default:
1246                                 break;
1247                         }
1248                 }
1249         }
1250 }
1251
1252 /*
1253  * Create a completion queue on the specified device.
1254  */
1255 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1256 {
1257         struct ib_qp_init_attr *qp_init;
1258         struct srpt_device *sdev = ch->sport->sdev;
1259         int cqe;
1260         int ret;
1261
1262         qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1263         if (!qp_init)
1264                 return -ENOMEM;
1265
1266         /* Create a completion queue (CQ). */
1267
1268         cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1269 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1270         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1271 #else
1272         ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1273 #endif
1274         if (IS_ERR(ch->cq)) {
1275                 ret = PTR_ERR(ch->cq);
1276                 printk(KERN_ERR PFX "failed to create_cq cqe= %d ret= %d\n",
1277                         cqe, ret);
1278                 goto out;
1279         }
1280
1281         /* Request completion notification. */
1282
1283         ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1284
1285         /* Create a queue pair (QP). */
1286
1287         qp_init->qp_context = (void *)ch;
1288         qp_init->event_handler = srpt_qp_event;
1289         qp_init->send_cq = ch->cq;
1290         qp_init->recv_cq = ch->cq;
1291         qp_init->srq = sdev->srq;
1292         qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1293         qp_init->qp_type = IB_QPT_RC;
1294         qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1295         qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1296
1297         ch->qp = ib_create_qp(sdev->pd, qp_init);
1298         if (IS_ERR(ch->qp)) {
1299                 ret = PTR_ERR(ch->qp);
1300                 ib_destroy_cq(ch->cq);
1301                 printk(KERN_ERR PFX "failed to create_qp ret= %d\n", ret);
1302                 goto out;
1303         }
1304
1305         printk(KERN_DEBUG PFX "%s: max_cqe= %d max_sge= %d cm_id= %p\n",
1306                __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1307                ch->cm_id);
1308
1309         /* Modify the attributes and the state of queue pair ch->qp. */
1310
1311         ret = srpt_init_ch_qp(ch, ch->qp);
1312         if (ret) {
1313                 ib_destroy_qp(ch->qp);
1314                 ib_destroy_cq(ch->cq);
1315                 goto out;
1316         }
1317
1318         atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1319 out:
1320         kfree(qp_init);
1321         return ret;
1322 }
1323
1324 static struct srpt_rdma_ch *srpt_find_channel(struct ib_cm_id *cm_id)
1325 {
1326         struct srpt_device *sdev = cm_id->context;
1327         struct srpt_rdma_ch *ch, *tmp_ch;
1328
1329         spin_lock_irq(&sdev->spinlock);
1330         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1331                 if (ch->cm_id == cm_id) {
1332                         spin_unlock_irq(&sdev->spinlock);
1333                         return ch;
1334                 }
1335         }
1336
1337         spin_unlock_irq(&sdev->spinlock);
1338
1339         return NULL;
1340 }
1341
1342 static int srpt_release_channel(struct srpt_rdma_ch *ch, int destroy_cmid)
1343 {
1344         spin_lock_irq(&ch->sport->sdev->spinlock);
1345         list_del(&ch->list);
1346         spin_unlock_irq(&ch->sport->sdev->spinlock);
1347
1348         if (ch->cm_id && destroy_cmid) {
1349                 printk(KERN_WARNING PFX
1350                        "%s: destroy cm_id= %p\n", __func__, ch->cm_id);
1351                 ib_destroy_cm_id(ch->cm_id);
1352                 ch->cm_id = NULL;
1353         }
1354
1355         ib_destroy_qp(ch->qp);
1356         ib_destroy_cq(ch->cq);
1357
1358         if (ch->scst_sess) {
1359                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1360
1361                 printk(KERN_WARNING PFX
1362                        "%s: release sess= %p sess_name= %s active_cmd= %d\n",
1363                        __func__, ch->scst_sess, ch->sess_name,
1364                        ch->active_scmnd_cnt);
1365
1366                 list_for_each_entry_safe(ioctx, ioctx_tmp,
1367                                          &ch->active_scmnd_list, scmnd_list) {
1368                         list_del(&ioctx->scmnd_list);
1369                         ch->active_scmnd_cnt--;
1370                 }
1371
1372                 scst_unregister_session(ch->scst_sess, 0, NULL);
1373                 ch->scst_sess = NULL;
1374         }
1375
1376         kfree(ch);
1377
1378         return destroy_cmid ? 0 : 1;
1379 }
1380
1381 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq)
1382 {
1383         spin_lock_irq(&ch->spinlock);
1384         ch->state = RDMA_CHANNEL_DISCONNECTING;
1385         spin_unlock_irq(&ch->spinlock);
1386
1387         if (dreq)
1388                 ib_send_cm_dreq(ch->cm_id, NULL, 0);
1389         else
1390                 ib_send_cm_drep(ch->cm_id, NULL, 0);
1391
1392         return 0;
1393 }
1394
1395 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1396                             struct ib_cm_req_event_param *param,
1397                             void *private_data)
1398 {
1399         struct srpt_device *sdev = cm_id->context;
1400         struct srp_login_req *req;
1401         struct srp_login_rsp *rsp;
1402         struct srp_login_rej *rej;
1403         struct ib_cm_rep_param *rep_param;
1404         struct srpt_rdma_ch *ch, *tmp_ch;
1405         u32 it_iu_len;
1406         int ret = 0;
1407
1408         if (!sdev || !private_data)
1409                 return -EINVAL;
1410
1411         rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1412         rej = kzalloc(sizeof *rej, GFP_KERNEL);
1413         rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1414
1415         if (!rsp || !rej || !rep_param) {
1416                 ret = -ENOMEM;
1417                 goto out;
1418         }
1419
1420         req = (struct srp_login_req *)private_data;
1421
1422         it_iu_len = be32_to_cpu(req->req_it_iu_len);
1423
1424         printk(KERN_DEBUG PFX
1425                "Host login i_port_id=0x%llx:0x%llx t_port_id=0x%llx:0x%llx"
1426                " it_iu_len=%d\n",
1427                (unsigned long long)
1428                be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1429                (unsigned long long)
1430                be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1431                (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1432                (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1433                it_iu_len);
1434
1435         if (it_iu_len > MAX_MESSAGE_SIZE || it_iu_len < 64) {
1436                 rej->reason =
1437                     cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1438                 ret = -EINVAL;
1439                 printk(KERN_WARNING PFX
1440                        "Reject invalid it_iu_len=%d\n", it_iu_len);
1441                 goto reject;
1442         }
1443
1444         if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1445                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1446
1447                 spin_lock_irq(&sdev->spinlock);
1448
1449                 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1450                         if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1451                             && !memcmp(ch->t_port_id, req->target_port_id, 16)
1452                             && param->port == ch->sport->port
1453                             && param->listen_id == ch->sport->sdev->cm_id
1454                             && ch->cm_id) {
1455                                 /* found an existing channel */
1456                                 printk(KERN_WARNING PFX
1457                                        "Found existing channel name= %s"
1458                                        " cm_id= %p state= %d\n",
1459                                        ch->sess_name, ch->cm_id, ch->state);
1460
1461                                 spin_unlock_irq(&sdev->spinlock);
1462
1463                                 rsp->rsp_flags =
1464                                     SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1465
1466                                 if (ch->state == RDMA_CHANNEL_LIVE)
1467                                         srpt_disconnect_channel(ch, 1);
1468                                 else if (ch->state == RDMA_CHANNEL_CONNECTING) {
1469                                         ib_send_cm_rej(ch->cm_id,
1470                                                        IB_CM_REJ_NO_RESOURCES,
1471                                                        NULL, 0, NULL, 0);
1472                                         srpt_release_channel(ch, 1);
1473                                 }
1474
1475                                 spin_lock_irq(&sdev->spinlock);
1476                         }
1477                 }
1478
1479                 spin_unlock_irq(&sdev->spinlock);
1480
1481         } else
1482                 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1483
1484         if (((u64) (*(u64 *) req->target_port_id) !=
1485              cpu_to_be64(mellanox_ioc_guid)) ||
1486             ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1487              cpu_to_be64(mellanox_ioc_guid))) {
1488                 rej->reason =
1489                     cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1490                 ret = -ENOMEM;
1491                 printk(KERN_WARNING PFX "Reject invalid target_port_id\n");
1492                 goto reject;
1493         }
1494
1495         ch = kzalloc(sizeof *ch, GFP_KERNEL);
1496         if (!ch) {
1497                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1498                 printk(KERN_WARNING PFX "Reject failed allocate rdma_ch\n");
1499                 ret = -ENOMEM;
1500                 goto reject;
1501         }
1502
1503         spin_lock_init(&ch->spinlock);
1504         memcpy(ch->i_port_id, req->initiator_port_id, 16);
1505         memcpy(ch->t_port_id, req->target_port_id, 16);
1506         ch->sport = &sdev->port[param->port - 1];
1507         ch->cm_id = cm_id;
1508         ch->state = RDMA_CHANNEL_CONNECTING;
1509         INIT_LIST_HEAD(&ch->cmd_wait_list);
1510         INIT_LIST_HEAD(&ch->active_scmnd_list);
1511
1512         ret = srpt_create_ch_ib(ch);
1513         if (ret) {
1514                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1515                 printk(KERN_WARNING PFX "Reject failed to create rdma_ch\n");
1516                 goto free_ch;
1517         }
1518
1519         ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTR);
1520         if (ret) {
1521                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1522                 printk(KERN_WARNING PFX
1523                        "Reject failed qp to rtr/rts ret=%d\n", ret);
1524                 goto destroy_ib;
1525         }
1526
1527         sprintf(ch->sess_name, "0x%016llx%016llx",
1528                 (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1529                 (unsigned long long)be64_to_cpu(*(u64 *)(ch->i_port_id + 8)));
1530
1531         BUG_ON(!sdev->scst_tgt);
1532         ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1533                                   NULL, NULL);
1534         if (!ch->scst_sess) {
1535                 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1536                 printk(KERN_WARNING PFX "Failed to create scst sess\n");
1537                 goto destroy_ib;
1538         }
1539
1540         spin_lock_irq(&sdev->spinlock);
1541         list_add_tail(&ch->list, &sdev->rch_list);
1542         spin_unlock_irq(&sdev->spinlock);
1543
1544         printk(KERN_DEBUG PFX "Establish connection sess=%p name=%s cm_id=%p\n",
1545                ch->scst_sess, ch->sess_name, ch->cm_id);
1546
1547         scst_sess_set_tgt_priv(ch->scst_sess, ch);
1548
1549         /* create srp_login_response */
1550         rsp->opcode = SRP_LOGIN_RSP;
1551         rsp->tag = req->tag;
1552         rsp->max_it_iu_len = req->req_it_iu_len;
1553         rsp->max_ti_iu_len = req->req_it_iu_len;
1554         rsp->buf_fmt =
1555             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1556         rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1557         atomic_set(&ch->req_lim_delta, 0);
1558
1559         /* create cm reply */
1560         rep_param->qp_num = ch->qp->qp_num;
1561         rep_param->private_data = (void *)rsp;
1562         rep_param->private_data_len = sizeof *rsp;
1563         rep_param->rnr_retry_count = 7;
1564         rep_param->flow_control = 1;
1565         rep_param->failover_accepted = 0;
1566         rep_param->srq = 1;
1567         rep_param->responder_resources = 4;
1568         rep_param->initiator_depth = 4;
1569
1570         ret = ib_send_cm_rep(cm_id, rep_param);
1571         if (ret)
1572                 srpt_release_channel(ch, 0);
1573
1574         goto out;
1575
1576 destroy_ib:
1577         ib_destroy_qp(ch->qp);
1578         ib_destroy_cq(ch->cq);
1579
1580 free_ch:
1581         kfree(ch);
1582
1583 reject:
1584         rej->opcode = SRP_LOGIN_REJ;
1585         rej->tag = req->tag;
1586         rej->buf_fmt =
1587             cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1588
1589         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1590                              (void *)rej, sizeof *rej);
1591
1592 out:
1593         kfree(rep_param);
1594         kfree(rsp);
1595         kfree(rej);
1596
1597         return ret;
1598 }
1599
1600 static int srpt_find_and_release_channel(struct ib_cm_id *cm_id)
1601 {
1602         struct srpt_rdma_ch *ch;
1603
1604         ch = srpt_find_channel(cm_id);
1605         if (!ch)
1606                 return -EINVAL;
1607
1608         return srpt_release_channel(ch, 0);
1609 }
1610
1611 static int srpt_cm_rej_recv(struct ib_cm_id *cm_id)
1612 {
1613         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1614         return srpt_find_and_release_channel(cm_id);
1615 }
1616
1617 static int srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
1618 {
1619         struct srpt_rdma_ch *ch;
1620         int ret;
1621
1622         ch = srpt_find_channel(cm_id);
1623         if (!ch)
1624                 return -EINVAL;
1625
1626         if (ch->state == RDMA_CHANNEL_CONNECTING) {
1627                 struct srpt_ioctx *ioctx, *ioctx_tmp;
1628
1629                 spin_lock_irq(&ch->spinlock);
1630                 ch->state = RDMA_CHANNEL_LIVE;
1631                 spin_unlock_irq(&ch->spinlock);
1632                 ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTS);
1633
1634                 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
1635                                          wait_list) {
1636                         list_del(&ioctx->wait_list);
1637                         srpt_handle_new_iu(ch, ioctx);
1638                 }
1639         } else if (ch->state == RDMA_CHANNEL_DISCONNECTING)
1640                 ret = -EAGAIN;
1641         else
1642                 ret = 0;
1643
1644         if (ret) {
1645                 printk(KERN_ERR PFX "cm_id=%p sess_name=%s state=%d\n",
1646                        cm_id, ch->sess_name, ch->state);
1647                 srpt_disconnect_channel(ch, 1);
1648         }
1649
1650         return ret;
1651 }
1652
1653 static int srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
1654 {
1655         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1656         return srpt_find_and_release_channel(cm_id);
1657 }
1658
1659 static int srpt_cm_rep_error(struct ib_cm_id *cm_id)
1660 {
1661         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1662         return srpt_find_and_release_channel(cm_id);
1663 }
1664
1665 static int srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
1666 {
1667         struct srpt_rdma_ch *ch;
1668         int ret = 0;
1669
1670         ch = srpt_find_channel(cm_id);
1671
1672         if (!ch)
1673                 return -EINVAL;
1674
1675         printk(KERN_DEBUG PFX "%s: cm_id= %p ch->state= %d\n",
1676                  __func__, cm_id, ch->state);
1677
1678         switch (ch->state) {
1679         case RDMA_CHANNEL_LIVE:
1680         case RDMA_CHANNEL_CONNECTING:
1681                 ret = srpt_disconnect_channel(ch, 0);
1682                 break;
1683         case RDMA_CHANNEL_DISCONNECTING:
1684         default:
1685                 break;
1686         }
1687
1688         return ret;
1689 }
1690
1691 static int srpt_cm_drep_recv(struct ib_cm_id *cm_id)
1692 {
1693         printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1694         return srpt_find_and_release_channel(cm_id);
1695 }
1696
1697 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1698 {
1699         int ret = 0;
1700
1701         switch (event->event) {
1702         case IB_CM_REQ_RECEIVED:
1703                 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
1704                                        event->private_data);
1705                 break;
1706         case IB_CM_REJ_RECEIVED:
1707                 ret = srpt_cm_rej_recv(cm_id);
1708                 break;
1709         case IB_CM_RTU_RECEIVED:
1710         case IB_CM_USER_ESTABLISHED:
1711                 ret = srpt_cm_rtu_recv(cm_id);
1712                 break;
1713         case IB_CM_DREQ_RECEIVED:
1714                 ret = srpt_cm_dreq_recv(cm_id);
1715                 break;
1716         case IB_CM_DREP_RECEIVED:
1717                 ret = srpt_cm_drep_recv(cm_id);
1718                 break;
1719         case IB_CM_TIMEWAIT_EXIT:
1720                 ret = srpt_cm_timewait_exit(cm_id);
1721                 break;
1722         case IB_CM_REP_ERROR:
1723                 ret = srpt_cm_rep_error(cm_id);
1724                 break;
1725         default:
1726                 break;
1727         }
1728
1729         return ret;
1730 }
1731
1732 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1733                                  struct srpt_ioctx *ioctx,
1734                                  struct scst_cmd *scmnd)
1735 {
1736         struct scatterlist *scat;
1737         scst_data_direction dir;
1738         struct rdma_iu *riu;
1739         struct srp_direct_buf *db;
1740         dma_addr_t dma_addr;
1741         struct ib_sge *sge;
1742         u64 raddr;
1743         u32 rsize;
1744         u32 tsize;
1745         u32 dma_len;
1746         int count, nrdma;
1747         int i, j, k;
1748
1749         scat = scst_cmd_get_sg(scmnd);
1750         dir = scst_cmd_get_data_direction(scmnd);
1751         count = dma_map_sg(ch->sport->sdev->device->dma_device, scat,
1752                            scst_cmd_get_sg_cnt(scmnd),
1753                            scst_to_tgt_dma_dir(dir));
1754         if (unlikely(!count))
1755                 return -EBUSY;
1756
1757         if (ioctx->rdma_ius && ioctx->n_rdma_ius)
1758                 nrdma = ioctx->n_rdma_ius;
1759         else {
1760                 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
1761
1762                 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
1763                                           scst_cmd_atomic(scmnd)
1764                                           ? GFP_ATOMIC : GFP_KERNEL);
1765                 if (!ioctx->rdma_ius) {
1766                         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1767                                      scat, scst_cmd_get_sg_cnt(scmnd),
1768                                      scst_to_tgt_dma_dir(dir));
1769                         return -ENOMEM;
1770                 }
1771
1772                 ioctx->n_rdma_ius = nrdma;
1773         }
1774
1775         db = ioctx->rbufs;
1776         tsize = (dir == SCST_DATA_READ) ?
1777                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1778         dma_len = sg_dma_len(&scat[0]);
1779         riu = ioctx->rdma_ius;
1780
1781         /*
1782          * For each remote desc - calculate the #ib_sge.
1783          * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
1784          *      each remote desc rdma_iu is required a rdma wr;
1785          * else
1786          *      we need to allocate extra rdma_iu to carry extra #ib_sge in
1787          *      another rdma wr
1788          */
1789         for (i = 0, j = 0;
1790              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1791                 rsize = be32_to_cpu(db->len);
1792                 raddr = be64_to_cpu(db->va);
1793                 riu->raddr = raddr;
1794                 riu->rkey = be32_to_cpu(db->key);
1795                 riu->sge_cnt = 0;
1796
1797                 /* calculate how many sge required for this remote_buf */
1798                 while (rsize > 0 && tsize > 0) {
1799
1800                         if (rsize >= dma_len) {
1801                                 tsize -= dma_len;
1802                                 rsize -= dma_len;
1803                                 raddr += dma_len;
1804
1805                                 if (tsize > 0) {
1806                                         ++j;
1807                                         if (j < count)
1808                                                 dma_len = sg_dma_len(&scat[j]);
1809                                 }
1810                         } else {
1811                                 tsize -= rsize;
1812                                 dma_len -= rsize;
1813                                 rsize = 0;
1814                         }
1815
1816                         ++riu->sge_cnt;
1817
1818                         if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
1819                                 riu->sge =
1820                                     kmalloc(riu->sge_cnt * sizeof *riu->sge,
1821                                             scst_cmd_atomic(scmnd)
1822                                             ? GFP_ATOMIC : GFP_KERNEL);
1823                                 if (!riu->sge)
1824                                         goto free_mem;
1825
1826                                 ++ioctx->n_rdma;
1827                                 ++riu;
1828                                 riu->sge_cnt = 0;
1829                                 riu->raddr = raddr;
1830                                 riu->rkey = be32_to_cpu(db->key);
1831                         }
1832                 }
1833
1834                 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
1835                                    scst_cmd_atomic(scmnd)
1836                                    ? GFP_ATOMIC : GFP_KERNEL);
1837
1838                 if (!riu->sge)
1839                         goto free_mem;
1840
1841                 ++ioctx->n_rdma;
1842         }
1843
1844         db = ioctx->rbufs;
1845         scat = scst_cmd_get_sg(scmnd);
1846         tsize = (dir == SCST_DATA_READ) ?
1847                 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1848         riu = ioctx->rdma_ius;
1849         dma_len = sg_dma_len(&scat[0]);
1850         dma_addr = sg_dma_address(&scat[0]);
1851
1852         /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
1853         for (i = 0, j = 0;
1854              j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1855                 rsize = be32_to_cpu(db->len);
1856                 sge = riu->sge;
1857                 k = 0;
1858
1859                 while (rsize > 0 && tsize > 0) {
1860                         sge->addr = dma_addr;
1861                         sge->lkey = ch->sport->sdev->mr->lkey;
1862
1863                         if (rsize >= dma_len) {
1864                                 sge->length =
1865                                         (tsize < dma_len) ? tsize : dma_len;
1866                                 tsize -= dma_len;
1867                                 rsize -= dma_len;
1868
1869                                 if (tsize > 0) {
1870                                         ++j;
1871                                         if (j < count) {
1872                                                 dma_len = sg_dma_len(&scat[j]);
1873                                                 dma_addr =
1874                                                     sg_dma_address(&scat[j]);
1875                                         }
1876                                 }
1877                         } else {
1878                                 sge->length = (tsize < rsize) ? tsize : rsize;
1879                                 tsize -= rsize;
1880                                 dma_len -= rsize;
1881                                 dma_addr += rsize;
1882                                 rsize = 0;
1883                         }
1884
1885                         ++k;
1886                         if (k == riu->sge_cnt && rsize > 0) {
1887                                 ++riu;
1888                                 sge = riu->sge;
1889                                 k = 0;
1890                         } else if (rsize > 0)
1891                                 ++sge;
1892                 }
1893         }
1894
1895         return 0;
1896
1897 free_mem:
1898         while (ioctx->n_rdma)
1899                 kfree(ioctx->rdma_ius[ioctx->n_rdma--].sge);
1900
1901         kfree(ioctx->rdma_ius);
1902
1903         dma_unmap_sg(ch->sport->sdev->device->dma_device,
1904                      scat, scst_cmd_get_sg_cnt(scmnd),
1905                      scst_to_tgt_dma_dir(dir));
1906
1907         return -ENOMEM;
1908 }
1909
1910 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1911                               scst_data_direction dir)
1912 {
1913         struct ib_send_wr wr;
1914         struct ib_send_wr *bad_wr;
1915         struct rdma_iu *riu;
1916         int i;
1917         int ret = 0;
1918
1919         riu = ioctx->rdma_ius;
1920         memset(&wr, 0, sizeof wr);
1921
1922         for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
1923                 wr.opcode = (dir == SCST_DATA_READ) ?
1924                     IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
1925                 wr.next = NULL;
1926                 wr.wr_id = ioctx->index;
1927                 wr.wr.rdma.remote_addr = riu->raddr;
1928                 wr.wr.rdma.rkey = riu->rkey;
1929                 wr.num_sge = riu->sge_cnt;
1930                 wr.sg_list = riu->sge;
1931
1932                 /* only get completion event for the last rdma wr */
1933                 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
1934                         wr.send_flags = IB_SEND_SIGNALED;
1935
1936                 ret = ib_post_send(ch->qp, &wr, &bad_wr);
1937                 if (ret)
1938                         break;
1939         }
1940
1941         return ret;
1942 }
1943
1944 /*
1945  * Start data reception. Must not block.
1946  */
1947 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1948                           struct scst_cmd *scmnd)
1949 {
1950         int ret;
1951
1952         ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
1953         if (ret) {
1954                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1955                 ret = SCST_TGT_RES_QUEUE_FULL;
1956                 goto out;
1957         }
1958
1959         ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
1960         if (ret) {
1961                 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1962                 if (ret == -EAGAIN || ret == -ENOMEM)
1963                         ret = SCST_TGT_RES_QUEUE_FULL;
1964                 else
1965                         ret = SCST_TGT_RES_FATAL_ERROR;
1966                 goto out;
1967         }
1968
1969         ret = SCST_TGT_RES_SUCCESS;
1970
1971 out:
1972         return ret;
1973 }
1974
1975 /*
1976  * Called by the SCST core to inform ib_srpt that data reception should start.
1977  * Must not block.
1978  */
1979 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
1980 {
1981         struct srpt_rdma_ch *ch;
1982         struct srpt_ioctx *ioctx;
1983
1984         ioctx = scst_cmd_get_tgt_priv(scmnd);
1985         BUG_ON(!ioctx);
1986
1987         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
1988         BUG_ON(!ch);
1989
1990         if (ch->state == RDMA_CHANNEL_DISCONNECTING)
1991                 return SCST_TGT_RES_FATAL_ERROR;
1992         else if (ch->state == RDMA_CHANNEL_CONNECTING)
1993                 return SCST_TGT_RES_QUEUE_FULL;
1994
1995         return srpt_xfer_data(ch, ioctx, scmnd);
1996 }
1997
1998 /*
1999  * Called by the SCST core. Transmits the response buffer and status held in
2000  * 'scmnd'. Must not block.
2001  */
2002 static int srpt_xmit_response(struct scst_cmd *scmnd)
2003 {
2004         struct srpt_rdma_ch *ch;
2005         struct srpt_ioctx *ioctx;
2006         struct srp_rsp *srp_rsp;
2007         u64 tag;
2008         int ret = SCST_TGT_RES_SUCCESS;
2009         int dir;
2010         int status;
2011
2012         ioctx = scst_cmd_get_tgt_priv(scmnd);
2013         BUG_ON(!ioctx);
2014
2015         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2016         BUG_ON(!ch);
2017
2018         tag = scst_cmd_get_tag(scmnd);
2019
2020         if (ch->state != RDMA_CHANNEL_LIVE) {
2021                 printk(KERN_ERR PFX
2022                        "%s: tag= %lld channel in bad state %d\n",
2023                        __func__, (unsigned long long)tag, ch->state);
2024
2025                 if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2026                         ret = SCST_TGT_RES_FATAL_ERROR;
2027                 else if (ch->state == RDMA_CHANNEL_CONNECTING)
2028                         ret = SCST_TGT_RES_QUEUE_FULL;
2029
2030                 if (unlikely(scst_cmd_aborted(scmnd)))
2031                         goto out_aborted;
2032
2033                 goto out;
2034         }
2035
2036         dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
2037                                 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
2038
2039         srp_rsp = ioctx->buf;
2040
2041         if (unlikely(scst_cmd_aborted(scmnd))) {
2042                 printk(KERN_ERR PFX
2043                        "%s: tag= %lld already get aborted\n",
2044                        __func__, (unsigned long long)tag);
2045                 goto out_aborted;
2046         }
2047
2048         dir = scst_cmd_get_data_direction(scmnd);
2049         status = scst_cmd_get_status(scmnd) & 0xff;
2050
2051         srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2052
2053         if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2054                 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2055                 if (srp_rsp->sense_data_len >
2056                     (MAX_MESSAGE_SIZE - sizeof *srp_rsp))
2057                         srp_rsp->sense_data_len =
2058                             MAX_MESSAGE_SIZE - sizeof *srp_rsp;
2059
2060                 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2061                        srp_rsp->sense_data_len);
2062
2063                 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2064                 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2065
2066                 if (!status)
2067                         status = SAM_STAT_CHECK_CONDITION;
2068         }
2069
2070         srp_rsp->status = status;
2071
2072         /* transfer read data if any */
2073         if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2074                 ret = srpt_xfer_data(ch, ioctx, scmnd);
2075                 if (ret != SCST_TGT_RES_SUCCESS) {
2076                         printk(KERN_ERR PFX
2077                                "%s: tag= %lld xfer_data failed\n",
2078                                __func__, (unsigned long long)tag);
2079                         goto out;
2080                 }
2081         }
2082
2083         if (srpt_post_send(ch, ioctx,
2084                            sizeof *srp_rsp +
2085                            be32_to_cpu(srp_rsp->sense_data_len))) {
2086                 printk(KERN_ERR PFX "%s: ch->state= %d tag= %lld\n",
2087                        __func__, ch->state,
2088                        (unsigned long long)tag);
2089                 ret = SCST_TGT_RES_FATAL_ERROR;
2090         }
2091
2092 out:
2093         return ret;
2094
2095 out_aborted:
2096         ret = SCST_TGT_RES_SUCCESS;
2097         scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
2098         scst_tgt_cmd_done(scmnd, SCST_CONTEXT_SAME);
2099         goto out;
2100 }
2101
2102 /*
2103  * Called by the SCST core to inform ib_srpt that a received task management
2104  * function has been completed. Must not block.
2105  */
2106 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2107 {
2108         struct srpt_rdma_ch *ch;
2109         struct srpt_mgmt_ioctx *mgmt_ioctx;
2110         struct srpt_ioctx *ioctx;
2111
2112         mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2113         BUG_ON(!mgmt_ioctx);
2114
2115         ch = mgmt_ioctx->ch;
2116         BUG_ON(!ch);
2117
2118         ioctx = mgmt_ioctx->ioctx;
2119         BUG_ON(!ioctx);
2120
2121         printk(KERN_WARNING PFX
2122                "%s: tsk_mgmt_done for tag= %lld status=%d\n",
2123                __func__, (unsigned long long)mgmt_ioctx->tag,
2124                scst_mgmt_cmd_get_status(mcmnd));
2125
2126         srpt_build_tskmgmt_rsp(ch, ioctx,
2127                                (scst_mgmt_cmd_get_status(mcmnd) ==
2128                                 SCST_MGMT_STATUS_SUCCESS) ?
2129                                SRP_TSK_MGMT_SUCCESS : SRP_TSK_MGMT_FAILED,
2130                                mgmt_ioctx->tag);
2131         srpt_post_send(ch, ioctx, sizeof(struct srp_rsp) + 4);
2132
2133         scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2134
2135         kfree(mgmt_ioctx);
2136 }
2137
2138 /*
2139  * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2140  * to be freed. May be called in IRQ context.
2141  */
2142 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2143 {
2144         struct srpt_rdma_ch *ch;
2145         struct srpt_ioctx *ioctx;
2146
2147         ioctx = scst_cmd_get_tgt_priv(scmnd);
2148         BUG_ON(!ioctx);
2149
2150         ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2151         BUG_ON(!ch);
2152
2153         spin_lock_irq(&ch->spinlock);
2154         list_del(&ioctx->scmnd_list);
2155         ch->active_scmnd_cnt--;
2156         spin_unlock_irq(&ch->spinlock);
2157
2158         srpt_reset_ioctx(ch, ioctx);
2159         scst_cmd_set_tgt_priv(scmnd, NULL);
2160 }
2161
2162 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2163 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2164 static void srpt_refresh_port_work(void *ctx)
2165 #else
2166 static void srpt_refresh_port_work(struct work_struct *work)
2167 #endif
2168 {
2169 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2170         struct srpt_port *sport = (struct srpt_port *)ctx;
2171 #else
2172         struct srpt_port *sport = container_of(work, struct srpt_port, work);
2173 #endif
2174
2175         srpt_refresh_port(sport);
2176 }
2177
2178 /*
2179  * Called by the SCST core to detect target adapters. Returns the number of
2180  * detected target adapters.
2181  */
2182 static int srpt_detect(struct scst_tgt_template *tp)
2183 {
2184         struct srpt_device *sdev;
2185         int count = 0;
2186
2187         TRACE_ENTRY();
2188
2189         list_for_each_entry(sdev, &srpt_devices, list)
2190                 ++count;
2191
2192         TRACE_EXIT();
2193
2194         return count;
2195 }
2196
2197 /*
2198  * Callback function called by the SCST core from scst_unregister() to free up
2199  * the resources associated with device scst_tgt.
2200  */
2201 static int srpt_release(struct scst_tgt *scst_tgt)
2202 {
2203         struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2204         struct srpt_rdma_ch *ch, *tmp_ch;
2205
2206         TRACE_ENTRY();
2207
2208         BUG_ON(!scst_tgt);
2209 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2210         WARN_ON(!sdev);
2211         if (!sdev)
2212                 return -ENODEV;
2213 #else
2214         if (WARN_ON(!sdev))
2215                 return -ENODEV;
2216 #endif
2217
2218         list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list)
2219             srpt_release_channel(ch, 1);
2220
2221         srpt_unregister_mad_agent(sdev);
2222
2223         scst_tgt_set_tgt_priv(scst_tgt, NULL);
2224
2225         TRACE_EXIT();
2226
2227         return 0;
2228 }
2229
2230 /*
2231  * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2232  * when the module parameter 'thread' is not zero (the default is zero).
2233  * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2234  *
2235  * @pre thread != 0
2236  */
2237 static int srpt_ioctx_thread(void *arg)
2238 {
2239         struct srpt_ioctx *ioctx;
2240
2241         /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2242         current->flags |= PF_NOFREEZE;
2243
2244         spin_lock_irq(&srpt_thread.thread_lock);
2245         while (!kthread_should_stop()) {
2246                 wait_queue_t wait;
2247                 init_waitqueue_entry(&wait, current);
2248
2249                 if (!srpt_test_ioctx_list()) {
2250                         add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2251
2252                         for (;;) {
2253                                 set_current_state(TASK_INTERRUPTIBLE);
2254                                 if (srpt_test_ioctx_list())
2255                                         break;
2256                                 spin_unlock_irq(&srpt_thread.thread_lock);
2257                                 schedule();
2258                                 spin_lock_irq(&srpt_thread.thread_lock);
2259                         }
2260                         set_current_state(TASK_RUNNING);
2261                         remove_wait_queue(&ioctx_list_waitQ, &wait);
2262                 }
2263
2264                 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2265                         ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2266                                            struct srpt_ioctx, comp_list);
2267
2268                         list_del(&ioctx->comp_list);
2269
2270                         spin_unlock_irq(&srpt_thread.thread_lock);
2271                         switch (ioctx->op) {
2272                         case IB_WC_SEND:
2273                                 srpt_handle_send_comp(ioctx->ch, ioctx,
2274                                         SCST_CONTEXT_DIRECT);
2275                                 break;
2276                         case IB_WC_RDMA_WRITE:
2277                         case IB_WC_RDMA_READ:
2278                                 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2279                                 break;
2280                         case IB_WC_RECV:
2281                                 srpt_handle_new_iu(ioctx->ch, ioctx);
2282                                 break;
2283                         default:
2284                                 break;
2285                         }
2286                         spin_lock_irq(&srpt_thread.thread_lock);
2287                 }
2288         }
2289         spin_unlock_irq(&srpt_thread.thread_lock);
2290
2291         return 0;
2292 }
2293
2294 /* SCST target template for the SRP target implementation. */
2295 static struct scst_tgt_template srpt_template = {
2296         .name = DRV_NAME,
2297         .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2298         .xmit_response_atomic = 1,
2299         .rdy_to_xfer_atomic = 1,
2300         .no_proc_entry = 1,
2301         .detect = srpt_detect,
2302         .release = srpt_release,
2303         .xmit_response = srpt_xmit_response,
2304         .rdy_to_xfer = srpt_rdy_to_xfer,
2305         .on_free_cmd = srpt_on_free_cmd,
2306         .task_mgmt_fn_done = srpt_tsk_mgmt_done
2307 };
2308
2309 /*
2310  * The callback function srpt_release_class_dev() is called whenever a
2311  * device is removed from the /sys/class/infiniband_srpt device class.
2312  */
2313 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2314 static void srpt_release_class_dev(struct class_device *class_dev)
2315 #else
2316 static void srpt_release_class_dev(struct device *dev)
2317 #endif
2318 {
2319 }
2320
2321 static const struct { int flag; const char *const label; }
2322         srpt_trace_label[] =
2323 {
2324         { TRACE_OUT_OF_MEM,     "out_of_mem"    },
2325         { TRACE_MINOR,          "minor"         },
2326         { TRACE_SG_OP,          "sg"            },
2327         { TRACE_MEMORY,         "mem"           },
2328         { TRACE_BUFF,           "buff"          },
2329         { TRACE_ENTRYEXIT,      "entryexit"     },
2330         { TRACE_PID,            "pid"           },
2331         { TRACE_LINE,           "line"          },
2332         { TRACE_FUNCTION,       "function"      },
2333         { TRACE_DEBUG,          "debug"         },
2334         { TRACE_SPECIAL,        "special"       },
2335         { TRACE_SCSI,           "scsi"          },
2336         { TRACE_MGMT,           "mgmt"          },
2337         { TRACE_MGMT_MINOR,     "mgmt_minor"    },
2338         { TRACE_MGMT_DEBUG,     "mgmt_dbg"      },
2339 };
2340
2341 /**
2342  * Convert a label into a trace flag. Consider exactly 'len' characters of
2343  * the label and ignore case. Return zero if no match has been found.
2344  */
2345 static unsigned long trace_label_to_flag(const char *const label, int len)
2346 {
2347         int i;
2348
2349         for (i = 0; i < ARRAY_SIZE(srpt_trace_label); i++)
2350                 if (strncasecmp(srpt_trace_label[i].label, label, len) == 0)
2351                         return srpt_trace_label[i].flag;
2352
2353         return 0;
2354 }
2355
2356 /**
2357  * Parse multiple tracing flags separated by whitespace. Return zero upon
2358  * error.
2359  */
2360 static unsigned long parse_flags(const char *buf, int count)
2361 {
2362         unsigned long result = 0;
2363         unsigned long flag;
2364         const char *p;
2365         const char *e;
2366
2367         for (p = buf; p < buf + count; p = e) {
2368                 while (p < buf + count && isspace(*p))
2369                         p++;
2370                 e = p;
2371                 while (e < buf + count && !isspace(*e))
2372                         e++;
2373                 if (e == p)
2374                         break;
2375                 flag = trace_label_to_flag(p, e - p);
2376                 if (!flag)
2377                         return 0;
2378                 result |= flag;
2379         }
2380         return result;
2381 }
2382
2383 /**
2384  * Convert a flag into a label. A flag is an integer with exactly one bit set.
2385  * Return NULL upon failure.
2386  */
2387 static const char *trace_flag_to_label(unsigned long flag)
2388 {
2389         int i;
2390
2391         if (flag == 0)
2392                 return NULL;
2393
2394         for (i = 0; i < ARRAY_SIZE(srpt_trace_label); i++)
2395                 if (srpt_trace_label[i].flag == flag)
2396                         return srpt_trace_label[i].label;
2397
2398         return NULL;
2399 }
2400
2401 /** sysfs function for showing the "trace_level" attribute. */
2402 static ssize_t srpt_show_trace_flags(struct class *class, char *buf)
2403 {
2404         int i;
2405         int first = 1;
2406
2407         if (trace_flag == 0) {
2408                 strcpy(buf, "none\n");
2409                 return strlen(buf);
2410         }
2411
2412         *buf = 0;
2413         for (i = 0; i < 8 * sizeof(trace_flag); i++) {
2414                 const char *label;
2415
2416                 label = trace_flag_to_label(trace_flag & (1UL << i));
2417                 if (label) {
2418                         if (first)
2419                                 first = 0;
2420                         else
2421                                 strcat(buf, " | ");
2422                         strcat(buf, label);
2423                 }
2424         }
2425         strcat(buf, "\n");
2426         return strlen(buf);
2427 }
2428
2429 /** sysfs function for storing the "trace_level" attribute. */
2430 static ssize_t srpt_store_trace_flags(struct class *class,
2431                                       const char *buf, size_t count)
2432 {
2433         unsigned long flags;
2434
2435         if (strncasecmp(buf, "all", 3) == 0)
2436                 trace_flag = TRACE_ALL;
2437         else if (strncasecmp(buf, "none", 4) == 0
2438                  || strncasecmp(buf, "null", 4) == 0) {
2439                 trace_flag = 0;
2440         } else if (strncasecmp(buf, "default", 7) == 0)
2441                 trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
2442         else if (strncasecmp(buf, "set ", 4) == 0) {
2443                 flags = parse_flags(buf + 4, count - 4);
2444                 if (flags)
2445                         trace_flag = flags;
2446                 else
2447                         count = -EINVAL;
2448         } else if (strncasecmp(buf, "add ", 4) == 0) {
2449                 flags = parse_flags(buf + 4, count - 4);
2450                 if (flags)
2451                         trace_flag |= flags;
2452                 else
2453                         count = -EINVAL;
2454         } else if (strncasecmp(buf, "del ", 4) == 0) {
2455                 flags = parse_flags(buf + 4, count - 4);
2456                 if (flags)
2457                         trace_flag &= ~flags;
2458                 else
2459                         count = -EINVAL;
2460         } else if (strncasecmp(buf, "value ", 4) == 0)
2461                 trace_flag = simple_strtoul(buf + 4, NULL, 0);
2462         else
2463                 count = -EINVAL;
2464         return count;
2465 }
2466
2467 static struct class_attribute srpt_class_attrs[] = {
2468 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2469         __ATTR(trace_level, 0600, srpt_show_trace_flags,
2470                srpt_store_trace_flags),
2471 #endif
2472         __ATTR_NULL,
2473 };
2474
2475 static struct class srpt_class = {
2476         .name = "infiniband_srpt",
2477 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2478         .release = srpt_release_class_dev,
2479 #else
2480         .dev_release = srpt_release_class_dev,
2481 #endif
2482         .class_attrs = srpt_class_attrs,
2483 };
2484
2485 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2486 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2487 #else
2488 static ssize_t show_login_info(struct device *dev,
2489                                struct device_attribute *attr, char *buf)
2490 #endif
2491 {
2492         struct srpt_device *sdev =
2493 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2494                 container_of(class_dev, struct srpt_device, class_dev);
2495 #else
2496                 container_of(dev, struct srpt_device, dev);
2497 #endif
2498         struct srpt_port *sport;
2499         int i;
2500         int len = 0;
2501
2502         for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2503                 sport = &sdev->port[i];
2504
2505                 len += sprintf(buf + len,
2506                                "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2507                                "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2508                                "service_id=%016llx\n",
2509                                (unsigned long long) mellanox_ioc_guid,
2510                                (unsigned long long) mellanox_ioc_guid,
2511                                be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2512                                be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2513                                be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2514                                be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2515                                be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2516                                be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2517                                be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2518                                be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2519                                (unsigned long long) mellanox_ioc_guid);
2520         }
2521
2522         return len;
2523 }
2524
2525 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2526 static CLASS_DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2527 #else
2528 static DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2529 #endif
2530
2531 /*
2532  * Callback function called by the InfiniBand core when either an InfiniBand
2533  * device has been added or during the ib_register_client() call for each
2534  * registered InfiniBand device.
2535  */
2536 static void srpt_add_one(struct ib_device *device)
2537 {
2538         struct srpt_device *sdev;
2539         struct srpt_port *sport;
2540         struct ib_srq_init_attr srq_attr;
2541         int i;
2542
2543         TRACE_ENTRY();
2544
2545         sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2546         if (!sdev)
2547                 return;
2548
2549         sdev->device = device;
2550
2551 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2552         sdev->class_dev.class = &srpt_class;
2553         sdev->class_dev.dev = device->dma_device;
2554         snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2555                  "srpt-%s", device->name);
2556 #else
2557         sdev->dev.class = &srpt_class;
2558         sdev->dev.parent = device->dma_device;
2559 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2560         snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2561 #else
2562         snprintf(sdev->init_name, sizeof(sdev->init_name),
2563                  "srpt-%s", device->name);
2564         sdev->dev.init_name = sdev->init_name;
2565 #endif
2566 #endif
2567
2568 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2569         if (class_device_register(&sdev->class_dev))
2570                 goto free_dev;
2571         if (class_device_create_file(&sdev->class_dev,
2572                                      &class_device_attr_login_info))
2573                 goto err_dev;
2574 #else
2575         if (device_register(&sdev->dev))
2576                 goto free_dev;
2577         if (device_create_file(&sdev->dev, &dev_attr_login_info))
2578                 goto err_dev;
2579 #endif
2580
2581         if (ib_query_device(device, &sdev->dev_attr))
2582                 goto err_dev;
2583
2584         sdev->pd = ib_alloc_pd(device);
2585         if (IS_ERR(sdev->pd))
2586                 goto err_dev;
2587
2588         sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2589         if (IS_ERR(sdev->mr))
2590                 goto err_pd;
2591
2592         srq_attr.event_handler = srpt_srq_event;
2593         srq_attr.srq_context = (void *)sdev;
2594         srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2595         srq_attr.attr.max_sge = 1;
2596         srq_attr.attr.srq_limit = 0;
2597
2598         sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2599         if (IS_ERR(sdev->srq))
2600                 goto err_mr;
2601
2602         printk(KERN_DEBUG PFX "%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
2603                __func__, srq_attr.attr.max_wr,
2604               sdev->dev_attr.max_srq_wr, device->name);
2605
2606         if (!mellanox_ioc_guid)
2607                 mellanox_ioc_guid = be64_to_cpu(device->node_guid);
2608
2609         sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2610         if (IS_ERR(sdev->cm_id))
2611                 goto err_srq;
2612
2613         /* print out target login information */
2614         printk(KERN_DEBUG PFX "Target login info: id_ext=%016llx,"
2615                 "ioc_guid=%016llx,pkey=ffff,service_id=%016llx\n",
2616                 (unsigned long long) mellanox_ioc_guid,
2617                 (unsigned long long) mellanox_ioc_guid,
2618                 (unsigned long long) mellanox_ioc_guid);
2619
2620         /*
2621          * We do not have a consistent service_id (ie. also id_ext of target_id)
2622          * to identify this target. We currently use the guid of the first HCA
2623          * in the system as service_id; therefore, the target_id will change
2624          * if this HCA is gone bad and replaced by different HCA
2625          */
2626         if (ib_cm_listen(sdev->cm_id, cpu_to_be64(mellanox_ioc_guid), 0, NULL))
2627                 goto err_cm;
2628
2629         INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2630                               srpt_event_handler);
2631         if (ib_register_event_handler(&sdev->event_handler))
2632                 goto err_cm;
2633
2634         if (srpt_alloc_ioctx_ring(sdev))
2635                 goto err_event;
2636
2637         INIT_LIST_HEAD(&sdev->rch_list);
2638         spin_lock_init(&sdev->spinlock);
2639
2640         for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2641                 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2642
2643         list_add_tail(&sdev->list, &srpt_devices);
2644
2645         ib_set_client_data(device, &srpt_client, sdev);
2646
2647         sdev->scst_tgt = scst_register(&srpt_template, NULL);
2648         if (!sdev->scst_tgt) {
2649                 printk(KERN_ERR PFX "SCST registration failed for %s.\n",
2650                         sdev->device->name);
2651                 goto err_ring;
2652         }
2653
2654         scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
2655
2656         for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
2657                 sport = &sdev->port[i - 1];
2658                 sport->sdev = sdev;
2659                 sport->port = i;
2660 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2661                 /*
2662                  * A vanilla 2.6.19 or older kernel without backported OFED
2663                  * kernel headers.
2664                  */
2665                 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
2666 #else
2667                 INIT_WORK(&sport->work, srpt_refresh_port_work);
2668 #endif
2669                 if (srpt_refresh_port(sport)) {
2670                         printk(KERN_ERR PFX "MAD registration failed"
2671                                " for %s-%d.\n", sdev->device->name, i);
2672                         goto err_refresh_port;
2673                 }
2674         }
2675
2676         TRACE_EXIT();
2677
2678         return;
2679
2680 err_refresh_port:
2681         scst_unregister(sdev->scst_tgt);
2682 err_ring:
2683         ib_set_client_data(device, &srpt_client, NULL);
2684         list_del(&sdev->list);
2685         srpt_free_ioctx_ring(sdev);
2686 err_event:
2687         ib_unregister_event_handler(&sdev->event_handler);
2688 err_cm:
2689         ib_destroy_cm_id(sdev->cm_id);
2690 err_srq:
2691         ib_destroy_srq(sdev->srq);
2692 err_mr:
2693         ib_dereg_mr(sdev->mr);
2694 err_pd:
2695         ib_dealloc_pd(sdev->pd);
2696 err_dev:
2697 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2698         class_device_unregister(&sdev->class_dev);
2699 #else
2700         device_unregister(&sdev->dev);
2701 #endif
2702 free_dev:
2703         kfree(sdev);
2704
2705         TRACE_EXIT();
2706 }
2707
2708 /*
2709  * Callback function called by the InfiniBand core when either an InfiniBand
2710  * device has been removed or during the ib_unregister_client() call for each
2711  * registered InfiniBand device.
2712  */
2713 static void srpt_remove_one(struct ib_device *device)
2714 {
2715         int i;
2716         struct srpt_device *sdev;
2717
2718         TRACE_ENTRY();
2719
2720         sdev = ib_get_client_data(device, &srpt_client);
2721 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2722         WARN_ON(!sdev);
2723         if (!sdev)
2724                 return;
2725 #else
2726         if (WARN_ON(!sdev))
2727                 return;
2728 #endif
2729
2730         /*
2731          * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
2732          * finished if it is running.
2733          */
2734         for (i = 0; i < sdev->device->phys_port_cnt; i++)
2735                 cancel_work_sync(&sdev->port[i].work);
2736
2737         scst_unregister(sdev->scst_tgt);
2738         sdev->scst_tgt = NULL;
2739
2740         ib_unregister_event_handler(&sdev->event_handler);
2741         ib_destroy_cm_id(sdev->cm_id);
2742         ib_destroy_srq(sdev->srq);
2743         ib_dereg_mr(sdev->mr);
2744         ib_dealloc_pd(sdev->pd);
2745 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2746         class_device_unregister(&sdev->class_dev);
2747 #else
2748         device_unregister(&sdev->dev);
2749 #endif
2750
2751         srpt_free_ioctx_ring(sdev);
2752         list_del(&sdev->list);
2753         kfree(sdev);
2754
2755         TRACE_EXIT();
2756 }
2757
2758 /*
2759  * Module initialization.
2760  *
2761  * Note: since ib_register_client() registers callback functions, and since at
2762  * least one of these callback functions (srpt_add_one()) calls SCST functions,
2763  * the SCST target template must be registered before ib_register_client() is
2764  * called.
2765  */
2766 static int __init srpt_init_module(void)
2767 {
2768         int ret;
2769
2770         INIT_LIST_HEAD(&srpt_devices);
2771
2772         ret = class_register(&srpt_class);
2773         if (ret) {
2774                 printk(KERN_ERR PFX "couldn't register class ib_srpt\n");
2775                 return ret;
2776         }
2777
2778         ret = scst_register_target_template(&srpt_template);
2779         if (ret < 0) {
2780                 printk(KERN_ERR PFX "couldn't register with scst\n");
2781                 ret = -ENODEV;
2782                 goto mem_out;
2783         }
2784
2785         ret = ib_register_client(&srpt_client);
2786         if (ret) {
2787                 printk(KERN_ERR PFX "couldn't register IB client\n");
2788                 goto scst_out;
2789         }
2790
2791         if (thread) {
2792                 spin_lock_init(&srpt_thread.thread_lock);
2793                 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
2794                 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
2795                                                  NULL, "srpt_thread");
2796                 if (IS_ERR(srpt_thread.thread)) {
2797                         srpt_thread.thread = NULL;
2798                         thread = 0;
2799                 }
2800         }
2801
2802         return 0;
2803
2804 scst_out:
2805         scst_unregister_target_template(&srpt_template);
2806 mem_out:
2807         class_unregister(&srpt_class);
2808         return ret;
2809 }
2810
2811 static void __exit srpt_cleanup_module(void)
2812 {
2813         TRACE_ENTRY();
2814
2815         if (srpt_thread.thread)
2816                 kthread_stop(srpt_thread.thread);
2817         ib_unregister_client(&srpt_client);
2818         scst_unregister_target_template(&srpt_template);
2819         class_unregister(&srpt_class);
2820
2821         TRACE_EXIT();
2822 }
2823
2824 module_init(srpt_init_module);
2825 module_exit(srpt_cleanup_module);