2 * Copyright (c) 2006 - 2009 Mellanox Technology Inc. All rights reserved.
3 * Copyright (C) 2008 Vladislav Bolkhovitin <vst@vlnb.net>
4 * Copyright (C) 2008 - 2009 Bart Van Assche <bart.vanassche@gmail.com>
6 * This software is available to you under a choice of one of two
7 * licenses. You may choose to be licensed under the terms of the GNU
8 * General Public License (GPL) Version 2, available from the file
9 * COPYING in the main directory of this source tree, or the
10 * OpenIB.org BSD license below:
12 * Redistribution and use in source and binary forms, with or
13 * without modification, are permitted provided that the following
16 * - Redistributions of source code must retain the above
17 * copyright notice, this list of conditions and the following
20 * - Redistributions in binary form must reproduce the above
21 * copyright notice, this list of conditions and the following
22 * disclaimer in the documentation and/or other materials
23 * provided with the distribution.
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
36 #include <linux/module.h>
37 #include <linux/init.h>
38 #include <linux/slab.h>
39 #include <linux/err.h>
40 #include <linux/ctype.h>
41 #include <linux/string.h>
42 #include <linux/kthread.h>
44 #include <asm/atomic.h>
47 #include "scst_debug.h"
49 /* Name of this kernel module. */
50 #define DRV_NAME "ib_srpt"
51 /* Prefix for printk() kernel messages. */
52 #define PFX DRV_NAME ": "
53 #define DRV_VERSION "1.0.1"
54 #define DRV_RELDATE "July 10, 2008"
55 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
56 /* Flags to be used in SCST debug tracing statements. */
57 #define DEFAULT_SRPT_TRACE_FLAGS (TRACE_OUT_OF_MEM | TRACE_MINOR \
58 | TRACE_MGMT | TRACE_SPECIAL)
61 #define MELLANOX_SRPT_ID_STRING "Mellanox OFED SRP target"
63 MODULE_AUTHOR("Vu Pham");
64 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol target "
65 "v" DRV_VERSION " (" DRV_RELDATE ")");
66 MODULE_LICENSE("Dual BSD/GPL");
69 /* Protects thread_ioctx_list. */
70 spinlock_t thread_lock;
71 /* I/O contexts to be processed by the kernel thread. */
72 struct list_head thread_ioctx_list;
73 /* SRPT kernel thread. */
74 struct task_struct *thread;
81 static u64 mellanox_ioc_guid;
82 /* List of srpt_device structures. */
83 static struct list_head srpt_devices;
85 static struct srpt_thread srpt_thread;
86 static DECLARE_WAIT_QUEUE_HEAD(ioctx_list_waitQ);
87 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
88 static unsigned long trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
89 module_param(trace_flag, long, 0644);
90 MODULE_PARM_DESC(trace_flag,
91 "Trace flags for the ib_srpt kernel module.");
94 module_param(thread, int, 0444);
95 MODULE_PARM_DESC(thread,
96 "Executing ioctx in thread context. Default 0, i.e. soft IRQ, "
99 static void srpt_add_one(struct ib_device *device);
100 static void srpt_remove_one(struct ib_device *device);
101 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq);
102 static void srpt_unregister_mad_agent(struct srpt_device *sdev);
104 static struct ib_client srpt_client = {
107 .remove = srpt_remove_one
111 * Callback function called by the InfiniBand core when an asynchronous IB
112 * event occurs. This callback may occur in interrupt context. See also
113 * section 11.5.2, Set Asynchronous Event Handler in the InfiniBand
114 * Architecture Specification.
116 static void srpt_event_handler(struct ib_event_handler *handler,
117 struct ib_event *event)
119 struct srpt_device *sdev =
120 ib_get_client_data(event->device, &srpt_client);
121 struct srpt_port *sport;
123 if (!sdev || sdev->device != event->device)
126 printk(KERN_WARNING PFX "ASYNC event= %d on device= %s\n",
127 event->event, sdev->device->name);
129 switch (event->event) {
130 case IB_EVENT_PORT_ERR:
131 if (event->element.port_num <= sdev->device->phys_port_cnt) {
132 sport = &sdev->port[event->element.port_num - 1];
137 case IB_EVENT_PORT_ACTIVE:
138 case IB_EVENT_LID_CHANGE:
139 case IB_EVENT_PKEY_CHANGE:
140 case IB_EVENT_SM_CHANGE:
141 case IB_EVENT_CLIENT_REREGISTER:
143 * Refresh port data asynchronously. Note: it is safe to call
144 * schedule_work() even if &sport->work is already on the
145 * global workqueue because schedule_work() tests for the
146 * work_pending() condition before adding &sport->work to the
149 if (event->element.port_num <= sdev->device->phys_port_cnt) {
150 sport = &sdev->port[event->element.port_num - 1];
151 if (!sport->lid && !sport->sm_lid)
152 schedule_work(&sport->work);
162 * Callback function called by the InfiniBand core for SRQ (shared receive
165 static void srpt_srq_event(struct ib_event *event, void *ctx)
167 printk(KERN_WARNING PFX "SRQ event %d\n", event->event);
171 * Callback function called by the InfiniBand core for QP (queue pair) events.
173 static void srpt_qp_event(struct ib_event *event, void *ctx)
175 struct srpt_rdma_ch *ch = ctx;
177 printk(KERN_WARNING PFX
178 "QP event %d on cm_id=%p sess_name=%s state=%d\n",
179 event->event, ch->cm_id, ch->sess_name, ch->state);
181 switch (event->event) {
182 case IB_EVENT_COMM_EST:
183 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 20) || defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
184 ib_cm_notify(ch->cm_id, event->event);
186 /* Vanilla 2.6.19 kernel (or before) without OFED. */
187 printk(KERN_ERR PFX "how to perform ib_cm_notify() on a"
188 " vanilla 2.6.18 kernel ???\n");
191 case IB_EVENT_QP_LAST_WQE_REACHED:
192 if (ch->state == RDMA_CHANNEL_LIVE) {
193 printk(KERN_WARNING PFX
194 "Schedule CM_DISCONNECT_WORK\n");
195 srpt_disconnect_channel(ch, 1);
204 * Helper function for filling in an InfiniBand IOUnitInfo structure. Copies
205 * the lowest four bits of value in element slot of the array of four bit
206 * elements called c_list (controller list). The index slot is one-based.
208 * @pre 1 <= slot && 0 <= value && value < 16
210 static void srpt_set_ioc(u8 *c_list, u32 slot, u8 value)
217 tmp = c_list[id] & 0xf;
218 c_list[id] = (value << 4) | tmp;
220 tmp = c_list[id] & 0xf0;
221 c_list[id] = (value & 0xf) | tmp;
226 * Write InfiniBand ClassPortInfo to mad. See also section 16.3.3.1
227 * ClassPortInfo in the InfiniBand Architecture Specification.
229 static void srpt_get_class_port_info(struct ib_dm_mad *mad)
231 struct ib_class_port_info *cif;
233 cif = (struct ib_class_port_info *)mad->data;
234 memset(cif, 0, sizeof *cif);
235 cif->base_version = 1;
236 cif->class_version = 1;
237 cif->resp_time_value = 20;
239 mad->mad_hdr.status = 0;
243 * Write IOUnitInfo to mad. See also section 16.3.3.3 IOUnitInfo in the
244 * InfiniBand Architecture Specification. See also section B.7,
245 * table B.6 in the T10 SRP r16a document.
247 static void srpt_get_iou(struct ib_dm_mad *mad)
249 struct ib_dm_iou_info *ioui;
253 ioui = (struct ib_dm_iou_info *)mad->data;
255 ioui->max_controllers = 16;
257 /* set present for slot 1 and empty for the rest */
258 srpt_set_ioc(ioui->controller_list, 1, 1);
259 for (i = 1, slot = 2; i < 16; i++, slot++)
260 srpt_set_ioc(ioui->controller_list, slot, 0);
262 mad->mad_hdr.status = 0;
266 * Write IOControllerprofile to mad for I/O controller (sdev, slot). See also
267 * section 16.3.3.4 IOControllerProfile in the InfiniBand Architecture
268 * Specification. See also section B.7, table B.7 in the T10 SRP r16a
271 static void srpt_get_ioc(struct srpt_device *sdev, u32 slot,
272 struct ib_dm_mad *mad)
274 struct ib_dm_ioc_profile *iocp;
276 iocp = (struct ib_dm_ioc_profile *)mad->data;
278 if (!slot || slot > 16) {
279 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
284 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
288 memset(iocp, 0, sizeof *iocp);
289 strcpy(iocp->id_string, MELLANOX_SRPT_ID_STRING);
290 iocp->guid = cpu_to_be64(mellanox_ioc_guid);
291 iocp->vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
292 iocp->device_id = cpu_to_be32(sdev->dev_attr.vendor_part_id);
293 iocp->device_version = cpu_to_be16(sdev->dev_attr.hw_ver);
294 iocp->subsys_vendor_id = cpu_to_be32(sdev->dev_attr.vendor_id);
295 iocp->subsys_device_id = 0x0;
296 iocp->io_class = cpu_to_be16(SRP_REV16A_IB_IO_CLASS);
297 iocp->io_subclass = cpu_to_be16(SRP_IO_SUBCLASS);
298 iocp->protocol = cpu_to_be16(SRP_PROTOCOL);
299 iocp->protocol_version = cpu_to_be16(SRP_PROTOCOL_VERSION);
300 iocp->send_queue_depth = cpu_to_be16(SRPT_SRQ_SIZE);
301 iocp->rdma_read_depth = 4;
302 iocp->send_size = cpu_to_be32(MAX_MESSAGE_SIZE);
303 iocp->rdma_size = cpu_to_be32(MAX_RDMA_SIZE);
304 iocp->num_svc_entries = 1;
305 iocp->op_cap_mask = SRP_SEND_TO_IOC | SRP_SEND_FROM_IOC |
306 SRP_RDMA_READ_FROM_IOC | SRP_RDMA_WRITE_FROM_IOC;
308 mad->mad_hdr.status = 0;
312 * Device management: write ServiceEntries to mad for the given slot. See also
313 * section 16.3.3.5 ServiceEntries in the InfiniBand Architecture
314 * Specification. See also section B.7, table B.8 in the T10 SRP r16a document.
316 static void srpt_get_svc_entries(u16 slot, u8 hi, u8 lo, struct ib_dm_mad *mad)
318 struct ib_dm_svc_entries *svc_entries;
320 if (!slot || slot > 16) {
321 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_INVALID_FIELD);
325 if (slot > 2 || lo > hi || hi > 1) {
326 mad->mad_hdr.status = cpu_to_be16(DM_MAD_STATUS_NO_IOC);
330 svc_entries = (struct ib_dm_svc_entries *)mad->data;
331 memset(svc_entries, 0, sizeof *svc_entries);
332 svc_entries->service_entries[0].id = cpu_to_be64(mellanox_ioc_guid);
333 sprintf(svc_entries->service_entries[0].name, "%s%016llx",
334 SRP_SERVICE_NAME_PREFIX, (unsigned long long)mellanox_ioc_guid);
336 mad->mad_hdr.status = 0;
340 * Actual processing of a received MAD *rq_mad received through source port *sp
341 * (MAD = InfiniBand management datagram). The response to be sent back is
342 * written to *rsp_mad.
344 static void srpt_mgmt_method_get(struct srpt_port *sp, struct ib_mad *rq_mad,
345 struct ib_dm_mad *rsp_mad)
351 attr_id = be16_to_cpu(rq_mad->mad_hdr.attr_id);
353 case DM_ATTR_CLASS_PORT_INFO:
354 srpt_get_class_port_info(rsp_mad);
356 case DM_ATTR_IOU_INFO:
357 srpt_get_iou(rsp_mad);
359 case DM_ATTR_IOC_PROFILE:
360 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
361 srpt_get_ioc(sp->sdev, slot, rsp_mad);
363 case DM_ATTR_SVC_ENTRIES:
364 slot = be32_to_cpu(rq_mad->mad_hdr.attr_mod);
365 hi = (u8) ((slot >> 8) & 0xff);
366 lo = (u8) (slot & 0xff);
367 slot = (u16) ((slot >> 16) & 0xffff);
368 srpt_get_svc_entries(slot, hi, lo, rsp_mad);
371 rsp_mad->mad_hdr.status =
372 cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
378 * Callback function that is called by the InfiniBand core after transmission of
379 * a MAD. (MAD = management datagram; AH = address handle.)
381 static void srpt_mad_send_handler(struct ib_mad_agent *mad_agent,
382 struct ib_mad_send_wc *mad_wc)
384 ib_destroy_ah(mad_wc->send_buf->ah);
385 ib_free_send_mad(mad_wc->send_buf);
389 * Callback function that is called by the InfiniBand core after reception of
390 * a MAD (management datagram).
392 static void srpt_mad_recv_handler(struct ib_mad_agent *mad_agent,
393 struct ib_mad_recv_wc *mad_wc)
395 struct srpt_port *sport = (struct srpt_port *)mad_agent->context;
397 struct ib_mad_send_buf *rsp;
398 struct ib_dm_mad *dm_mad;
400 if (!mad_wc || !mad_wc->recv_buf.mad)
403 ah = ib_create_ah_from_wc(mad_agent->qp->pd, mad_wc->wc,
404 mad_wc->recv_buf.grh, mad_agent->port_num);
408 BUILD_BUG_ON(offsetof(struct ib_dm_mad, data) != IB_MGMT_DEVICE_HDR);
410 rsp = ib_create_send_mad(mad_agent, mad_wc->wc->src_qp,
411 mad_wc->wc->pkey_index, 0,
412 IB_MGMT_DEVICE_HDR, IB_MGMT_DEVICE_DATA,
420 memcpy(dm_mad, mad_wc->recv_buf.mad, sizeof *dm_mad);
421 dm_mad->mad_hdr.method = IB_MGMT_METHOD_GET_RESP;
422 dm_mad->mad_hdr.status = 0;
424 switch (mad_wc->recv_buf.mad->mad_hdr.method) {
425 case IB_MGMT_METHOD_GET:
426 srpt_mgmt_method_get(sport, mad_wc->recv_buf.mad, dm_mad);
428 case IB_MGMT_METHOD_SET:
429 dm_mad->mad_hdr.status =
430 cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD_ATTR);
433 dm_mad->mad_hdr.status =
434 cpu_to_be16(DM_MAD_STATUS_UNSUP_METHOD);
438 if (!ib_post_send_mad(rsp, NULL)) {
439 ib_free_recv_mad(mad_wc);
440 /* will destroy_ah & free_send_mad in send completion */
444 ib_free_send_mad(rsp);
449 ib_free_recv_mad(mad_wc);
453 * Enable InfiniBand management datagram processing, update the cached sm_lid,
454 * lid and gid values, and register a callback function for processing MADs
455 * on the specified port. It is safe to call this function more than once for
458 static int srpt_refresh_port(struct srpt_port *sport)
460 struct ib_mad_reg_req reg_req;
461 struct ib_port_modify port_modify;
462 struct ib_port_attr port_attr;
465 memset(&port_modify, 0, sizeof port_modify);
466 port_modify.set_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
467 port_modify.clr_port_cap_mask = 0;
469 ret = ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
473 ret = ib_query_port(sport->sdev->device, sport->port, &port_attr);
477 sport->sm_lid = port_attr.sm_lid;
478 sport->lid = port_attr.lid;
480 ret = ib_query_gid(sport->sdev->device, sport->port, 0, &sport->gid);
484 if (!sport->mad_agent) {
485 memset(®_req, 0, sizeof reg_req);
486 reg_req.mgmt_class = IB_MGMT_CLASS_DEVICE_MGMT;
487 reg_req.mgmt_class_version = IB_MGMT_BASE_VERSION;
488 set_bit(IB_MGMT_METHOD_GET, reg_req.method_mask);
489 set_bit(IB_MGMT_METHOD_SET, reg_req.method_mask);
491 sport->mad_agent = ib_register_mad_agent(sport->sdev->device,
495 srpt_mad_send_handler,
496 srpt_mad_recv_handler,
498 if (IS_ERR(sport->mad_agent)) {
499 ret = PTR_ERR(sport->mad_agent);
500 sport->mad_agent = NULL;
509 port_modify.set_port_cap_mask = 0;
510 port_modify.clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP;
511 ib_modify_port(sport->sdev->device, sport->port, 0, &port_modify);
519 * Unregister the callback function for processing MADs and disable MAD
520 * processing for all ports of the specified device. It is safe to call this
521 * function more than once for the same device.
523 static void srpt_unregister_mad_agent(struct srpt_device *sdev)
525 struct ib_port_modify port_modify = {
526 .clr_port_cap_mask = IB_PORT_DEVICE_MGMT_SUP,
528 struct srpt_port *sport;
531 for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
532 sport = &sdev->port[i - 1];
533 WARN_ON(sport->port != i);
534 if (ib_modify_port(sdev->device, i, 0, &port_modify) < 0)
535 printk(KERN_ERR PFX "disabling MAD processing"
537 if (sport->mad_agent) {
538 ib_unregister_mad_agent(sport->mad_agent);
539 sport->mad_agent = NULL;
545 * Allocate and initialize an SRPT I/O context structure.
547 static struct srpt_ioctx *srpt_alloc_ioctx(struct srpt_device *sdev)
549 struct srpt_ioctx *ioctx;
551 ioctx = kmalloc(sizeof *ioctx, GFP_KERNEL);
555 ioctx->buf = kzalloc(MAX_MESSAGE_SIZE, GFP_KERNEL);
559 ioctx->dma = dma_map_single(sdev->device->dma_device, ioctx->buf,
560 MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
561 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
562 if (dma_mapping_error(sdev->device->dma_device, ioctx->dma))
564 if (dma_mapping_error(ioctx->dma))
579 * Deallocate an SRPT I/O context structure.
581 static void srpt_free_ioctx(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
586 dma_unmap_single(sdev->device->dma_device, ioctx->dma,
587 MAX_MESSAGE_SIZE, DMA_BIDIRECTIONAL);
593 * Associate a ring of SRPT I/O context structures with the specified device.
595 static int srpt_alloc_ioctx_ring(struct srpt_device *sdev)
599 for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
600 sdev->ioctx_ring[i] = srpt_alloc_ioctx(sdev);
602 if (!sdev->ioctx_ring[i])
605 sdev->ioctx_ring[i]->index = i;
612 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
613 sdev->ioctx_ring[i] = NULL;
618 /* Free the ring of SRPT I/O context structures. */
619 static void srpt_free_ioctx_ring(struct srpt_device *sdev)
623 for (i = 0; i < SRPT_SRQ_SIZE; ++i) {
624 srpt_free_ioctx(sdev, sdev->ioctx_ring[i]);
625 sdev->ioctx_ring[i] = NULL;
630 * Post a receive request on the work queue of InfiniBand device 'sdev'.
632 static int srpt_post_recv(struct srpt_device *sdev, struct srpt_ioctx *ioctx)
635 struct ib_recv_wr wr, *bad_wr;
637 wr.wr_id = ioctx->index | SRPT_OP_RECV;
639 list.addr = ioctx->dma;
640 list.length = MAX_MESSAGE_SIZE;
641 list.lkey = sdev->mr->lkey;
647 return ib_post_srq_recv(sdev->srq, &wr, &bad_wr);
651 * Post a send request on the SRPT RDMA channel 'ch'.
653 static int srpt_post_send(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
657 struct ib_send_wr wr, *bad_wr;
658 struct srpt_device *sdev = ch->sport->sdev;
660 dma_sync_single_for_device(sdev->device->dma_device, ioctx->dma,
661 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
663 list.addr = ioctx->dma;
665 list.lkey = sdev->mr->lkey;
668 wr.wr_id = ioctx->index;
671 wr.opcode = IB_WR_SEND;
672 wr.send_flags = IB_SEND_SIGNALED;
674 return ib_post_send(ch->qp, &wr, &bad_wr);
677 static int srpt_get_desc_tbl(struct srpt_ioctx *ioctx, struct srp_cmd *srp_cmd,
680 struct srp_indirect_buf *idb;
681 struct srp_direct_buf *db;
684 if (((srp_cmd->buf_fmt & 0xf) == SRP_DATA_DESC_DIRECT) ||
685 ((srp_cmd->buf_fmt >> 4) == SRP_DATA_DESC_DIRECT)) {
687 ioctx->rbufs = &ioctx->single_rbuf;
689 db = (void *)srp_cmd->add_data;
690 memcpy(ioctx->rbufs, db, sizeof *db);
691 ioctx->data_len = be32_to_cpu(db->len);
693 idb = (void *)srp_cmd->add_data;
695 ioctx->n_rbuf = be32_to_cpu(idb->table_desc.len) / sizeof *db;
698 (srp_cmd->data_out_desc_cnt + srp_cmd->data_in_desc_cnt)) {
704 if (ioctx->n_rbuf == 1)
705 ioctx->rbufs = &ioctx->single_rbuf;
708 kmalloc(ioctx->n_rbuf * sizeof *db, GFP_ATOMIC);
715 memcpy(ioctx->rbufs, db, ioctx->n_rbuf * sizeof *db);
716 ioctx->data_len = be32_to_cpu(idb->len);
723 * Modify the attributes of queue pair 'qp': allow local write, remote read,
724 * and remote write. Also transition 'qp' to state IB_QPS_INIT.
726 static int srpt_init_ch_qp(struct srpt_rdma_ch *ch, struct ib_qp *qp)
728 struct ib_qp_attr *attr;
731 attr = kzalloc(sizeof *attr, GFP_KERNEL);
735 attr->qp_state = IB_QPS_INIT;
736 attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_READ |
737 IB_ACCESS_REMOTE_WRITE;
738 attr->port_num = ch->sport->port;
739 attr->pkey_index = 0;
741 ret = ib_modify_qp(qp, attr,
742 IB_QP_STATE | IB_QP_ACCESS_FLAGS | IB_QP_PORT |
749 static int srpt_ch_qp_rtr_rts(struct srpt_rdma_ch *ch, struct ib_qp *qp,
750 enum ib_qp_state qp_state)
752 struct ib_qp_attr *qp_attr;
756 qp_attr = kmalloc(sizeof *qp_attr, GFP_KERNEL);
760 qp_attr->qp_state = qp_state;
761 ret = ib_cm_init_qp_attr(ch->cm_id, qp_attr, &attr_mask);
765 if (qp_state == IB_QPS_RTR)
766 qp_attr->max_dest_rd_atomic = 4;
768 qp_attr->max_rd_atomic = 4;
770 ret = ib_modify_qp(qp, qp_attr, attr_mask);
777 static void srpt_reset_ioctx(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
781 if (ioctx->n_rdma_ius > 0 && ioctx->rdma_ius) {
782 struct rdma_iu *riu = ioctx->rdma_ius;
784 for (i = 0; i < ioctx->n_rdma_ius; ++i, ++riu)
786 kfree(ioctx->rdma_ius);
789 if (ioctx->n_rbuf > 1)
792 if (srpt_post_recv(ch->sport->sdev, ioctx))
793 printk(KERN_ERR PFX "SRQ post_recv failed - this is serious\n");
794 /* we should queue it back to free_ioctx queue */
796 atomic_inc(&ch->req_lim_delta);
799 static void srpt_handle_err_comp(struct srpt_rdma_ch *ch, struct ib_wc *wc)
801 struct srpt_ioctx *ioctx;
802 struct srpt_device *sdev = ch->sport->sdev;
803 scst_data_direction dir = SCST_DATA_NONE;
805 if (wc->wr_id & SRPT_OP_RECV) {
806 ioctx = sdev->ioctx_ring[wc->wr_id & ~SRPT_OP_RECV];
807 printk(KERN_ERR PFX "This is serious - SRQ is in bad state\n");
809 ioctx = sdev->ioctx_ring[wc->wr_id];
812 struct scst_cmd *scmnd = ioctx->scmnd;
814 dir = scst_cmd_get_data_direction(scmnd);
816 if (dir == SCST_DATA_NONE)
817 scst_tgt_cmd_done(scmnd,
818 scst_estimate_context());
820 dma_unmap_sg(sdev->device->dma_device,
821 scst_cmd_get_sg(scmnd),
822 scst_cmd_get_sg_cnt(scmnd),
823 scst_to_tgt_dma_dir(dir));
825 if (scmnd->state == SCST_CMD_STATE_DATA_WAIT)
827 SCST_RX_STATUS_ERROR,
828 SCST_CONTEXT_THREAD);
829 else if (scmnd->state == SCST_CMD_STATE_XMIT_WAIT)
830 scst_tgt_cmd_done(scmnd,
831 scst_estimate_context());
834 srpt_reset_ioctx(ch, ioctx);
838 static void srpt_handle_send_comp(struct srpt_rdma_ch *ch,
839 struct srpt_ioctx *ioctx,
840 enum scst_exec_context context)
843 scst_data_direction dir =
844 scst_cmd_get_data_direction(ioctx->scmnd);
846 if (dir != SCST_DATA_NONE)
847 dma_unmap_sg(ch->sport->sdev->device->dma_device,
848 scst_cmd_get_sg(ioctx->scmnd),
849 scst_cmd_get_sg_cnt(ioctx->scmnd),
850 scst_to_tgt_dma_dir(dir));
852 scst_tgt_cmd_done(ioctx->scmnd, context);
854 srpt_reset_ioctx(ch, ioctx);
857 static void srpt_handle_rdma_comp(struct srpt_rdma_ch *ch,
858 struct srpt_ioctx *ioctx)
861 srpt_reset_ioctx(ch, ioctx);
865 if (scst_cmd_get_data_direction(ioctx->scmnd) == SCST_DATA_WRITE)
866 scst_rx_data(ioctx->scmnd, SCST_RX_STATUS_SUCCESS,
867 scst_estimate_context());
870 static void srpt_build_cmd_rsp(struct srpt_rdma_ch *ch,
871 struct srpt_ioctx *ioctx, u8 s_key, u8 s_code,
874 struct srp_rsp *srp_rsp;
875 struct sense_data *sense;
878 srp_rsp = ioctx->buf;
879 memset(srp_rsp, 0, sizeof *srp_rsp);
881 limit_delta = atomic_read(&ch->req_lim_delta);
882 atomic_sub(limit_delta, &ch->req_lim_delta);
884 srp_rsp->opcode = SRP_RSP;
885 srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
888 if (s_key != NO_SENSE) {
889 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
890 srp_rsp->status = SAM_STAT_CHECK_CONDITION;
891 srp_rsp->sense_data_len =
892 cpu_to_be32(sizeof *sense + (sizeof *sense % 4));
894 sense = (struct sense_data *)(srp_rsp + 1);
895 sense->err_code = 0x70;
897 sense->asc_ascq = s_code;
901 static void srpt_build_tskmgmt_rsp(struct srpt_rdma_ch *ch,
902 struct srpt_ioctx *ioctx, u8 rsp_code,
905 struct srp_rsp *srp_rsp;
908 dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
909 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
911 srp_rsp = ioctx->buf;
912 memset(srp_rsp, 0, sizeof *srp_rsp);
914 limit_delta = atomic_read(&ch->req_lim_delta);
915 atomic_sub(limit_delta, &ch->req_lim_delta);
917 srp_rsp->opcode = SRP_RSP;
918 srp_rsp->req_lim_delta = cpu_to_be32(limit_delta);
921 if (rsp_code != SRP_TSK_MGMT_SUCCESS) {
922 srp_rsp->flags |= SRP_RSP_FLAG_RSPVALID;
923 srp_rsp->resp_data_len = cpu_to_be32(4);
924 srp_rsp->data[3] = rsp_code;
931 static int srpt_handle_cmd(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx)
933 struct scst_cmd *scmnd = NULL;
934 struct srp_cmd *srp_cmd = NULL;
935 scst_data_direction dir = SCST_DATA_NONE;
936 int indirect_desc = 0;
940 srp_cmd = ioctx->buf;
942 if (srp_cmd->buf_fmt) {
943 ret = srpt_get_desc_tbl(ioctx, srp_cmd, &indirect_desc);
945 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
946 NO_ADD_SENSE, srp_cmd->tag);
947 ((struct srp_rsp *)ioctx->buf)->status =
948 SAM_STAT_TASK_SET_FULL;
953 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
954 NO_ADD_SENSE, srp_cmd->tag);
955 ((struct srp_rsp *)ioctx->buf)->status =
956 SAM_STAT_TASK_SET_FULL;
960 if (srp_cmd->buf_fmt & 0xf)
961 dir = SCST_DATA_READ;
962 else if (srp_cmd->buf_fmt >> 4)
963 dir = SCST_DATA_WRITE;
965 dir = SCST_DATA_NONE;
967 dir = SCST_DATA_NONE;
969 scmnd = scst_rx_cmd(ch->scst_sess, (u8 *) &srp_cmd->lun,
970 sizeof srp_cmd->lun, srp_cmd->cdb, 16,
971 thread ? SCST_NON_ATOMIC : SCST_ATOMIC);
973 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE,
974 NO_ADD_SENSE, srp_cmd->tag);
975 ((struct srp_rsp *)ioctx->buf)->status =
976 SAM_STAT_TASK_SET_FULL;
980 ioctx->scmnd = scmnd;
982 switch (srp_cmd->task_attr) {
983 case SRP_CMD_HEAD_OF_Q:
984 scmnd->queue_type = SCST_CMD_QUEUE_HEAD_OF_QUEUE;
986 case SRP_CMD_ORDERED_Q:
987 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
989 case SRP_CMD_SIMPLE_Q:
990 scmnd->queue_type = SCST_CMD_QUEUE_SIMPLE;
993 scmnd->queue_type = SCST_CMD_QUEUE_ACA;
996 scmnd->queue_type = SCST_CMD_QUEUE_ORDERED;
1000 scst_cmd_set_tag(scmnd, srp_cmd->tag);
1001 scst_cmd_set_tgt_priv(scmnd, ioctx);
1002 scst_cmd_set_expected(scmnd, dir, ioctx->data_len);
1004 spin_lock_irqsave(&ch->spinlock, flags);
1005 list_add_tail(&ioctx->scmnd_list, &ch->active_scmnd_list);
1006 ch->active_scmnd_cnt++;
1007 spin_unlock_irqrestore(&ch->spinlock, flags);
1009 scst_cmd_init_done(scmnd, scst_estimate_context());
1018 * Process SRP_TSK_MGMT. See also table 19 in the T10 SRP r16a document.
1020 static int srpt_handle_tsk_mgmt(struct srpt_rdma_ch *ch,
1021 struct srpt_ioctx *ioctx)
1023 struct srp_tsk_mgmt *srp_tsk = NULL;
1024 struct srpt_mgmt_ioctx *mgmt_ioctx;
1027 srp_tsk = ioctx->buf;
1029 printk(KERN_WARNING PFX
1030 "recv_tsk_mgmt= %d for task_tag= %lld"
1031 " using tag= %lld cm_id= %p sess= %p\n",
1032 srp_tsk->tsk_mgmt_func,
1033 (unsigned long long) srp_tsk->task_tag,
1034 (unsigned long long) srp_tsk->tag,
1035 ch->cm_id, ch->scst_sess);
1037 mgmt_ioctx = kmalloc(sizeof *mgmt_ioctx, GFP_ATOMIC);
1039 srpt_build_tskmgmt_rsp(ch, ioctx, SRP_TSK_MGMT_FAILED,
1044 mgmt_ioctx->ioctx = ioctx;
1045 mgmt_ioctx->ch = ch;
1046 mgmt_ioctx->tag = srp_tsk->tag;
1048 switch (srp_tsk->tsk_mgmt_func) {
1049 case SRP_TSK_ABORT_TASK:
1050 ret = scst_rx_mgmt_fn_tag(ch->scst_sess,
1054 SCST_NON_ATOMIC : SCST_ATOMIC,
1057 case SRP_TSK_ABORT_TASK_SET:
1058 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1059 SCST_ABORT_TASK_SET,
1060 (u8 *) &srp_tsk->lun,
1061 sizeof srp_tsk->lun,
1063 SCST_NON_ATOMIC : SCST_ATOMIC,
1066 case SRP_TSK_CLEAR_TASK_SET:
1067 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1068 SCST_CLEAR_TASK_SET,
1069 (u8 *) &srp_tsk->lun,
1070 sizeof srp_tsk->lun,
1072 SCST_NON_ATOMIC : SCST_ATOMIC,
1076 case SRP_TSK_LUN_RESET:
1077 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1079 (u8 *) &srp_tsk->lun,
1080 sizeof srp_tsk->lun,
1082 SCST_NON_ATOMIC : SCST_ATOMIC,
1086 case SRP_TSK_CLEAR_ACA:
1087 ret = scst_rx_mgmt_fn_lun(ch->scst_sess,
1089 (u8 *) &srp_tsk->lun,
1090 sizeof srp_tsk->lun,
1092 SCST_NON_ATOMIC : SCST_ATOMIC,
1096 srpt_build_tskmgmt_rsp(ch, ioctx,
1097 SRP_TSK_MGMT_FUNC_NOT_SUPP,
1107 static void srpt_handle_new_iu(struct srpt_rdma_ch *ch,
1108 struct srpt_ioctx *ioctx)
1111 unsigned long flags;
1113 if (ch->state != RDMA_CHANNEL_LIVE) {
1114 if (ch->state == RDMA_CHANNEL_CONNECTING) {
1115 spin_lock_irqsave(&ch->spinlock, flags);
1116 list_add_tail(&ioctx->wait_list, &ch->cmd_wait_list);
1117 spin_unlock_irqrestore(&ch->spinlock, flags);
1119 srpt_reset_ioctx(ch, ioctx);
1124 dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
1125 MAX_MESSAGE_SIZE, DMA_FROM_DEVICE);
1127 ioctx->data_len = 0;
1129 ioctx->rbufs = NULL;
1131 ioctx->n_rdma_ius = 0;
1132 ioctx->rdma_ius = NULL;
1133 ioctx->scmnd = NULL;
1135 op = *(u8 *) ioctx->buf;
1138 if (srpt_handle_cmd(ch, ioctx) < 0)
1143 if (srpt_handle_tsk_mgmt(ch, ioctx) < 0)
1150 srpt_build_cmd_rsp(ch, ioctx, ILLEGAL_REQUEST, INVALID_CDB,
1151 ((struct srp_cmd *)ioctx->buf)->tag);
1156 dma_sync_single_for_device(ch->sport->sdev->device->dma_device,
1157 ioctx->dma, MAX_MESSAGE_SIZE,
1163 if (ch->state != RDMA_CHANNEL_LIVE ||
1164 srpt_post_send(ch, ioctx,
1165 sizeof(struct srp_rsp) +
1166 be32_to_cpu(((struct srp_rsp *)ioctx->buf)->
1168 srpt_reset_ioctx(ch, ioctx);
1172 * Returns true if the ioctx list is non-empty or if the ib_srpt kernel thread
1176 static inline int srpt_test_ioctx_list(void)
1178 int res = (!list_empty(&srpt_thread.thread_ioctx_list) ||
1179 unlikely(kthread_should_stop()));
1184 * Add 'ioctx' to the tail of the ioctx list and wake up the kernel thread.
1188 static inline void srpt_schedule_thread(struct srpt_ioctx *ioctx)
1190 unsigned long flags;
1192 spin_lock_irqsave(&srpt_thread.thread_lock, flags);
1193 list_add_tail(&ioctx->comp_list, &srpt_thread.thread_ioctx_list);
1194 spin_unlock_irqrestore(&srpt_thread.thread_lock, flags);
1195 wake_up(&ioctx_list_waitQ);
1199 * InfiniBand CQ (completion queue) event handler for asynchronous events not
1200 * associated with a completion.
1202 static void srpt_completion(struct ib_cq *cq, void *ctx)
1204 struct srpt_rdma_ch *ch = ctx;
1205 struct srpt_device *sdev = ch->sport->sdev;
1207 struct srpt_ioctx *ioctx;
1209 ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1210 while (ib_poll_cq(ch->cq, 1, &wc) > 0) {
1212 printk(KERN_ERR PFX "failed %s status= %d\n",
1213 wc.wr_id & SRPT_OP_RECV ? "receive" : "send",
1215 srpt_handle_err_comp(ch, &wc);
1219 if (wc.wr_id & SRPT_OP_RECV) {
1220 ioctx = sdev->ioctx_ring[wc.wr_id & ~SRPT_OP_RECV];
1223 ioctx->op = IB_WC_RECV;
1224 srpt_schedule_thread(ioctx);
1226 srpt_handle_new_iu(ch, ioctx);
1229 ioctx = sdev->ioctx_ring[wc.wr_id];
1233 ioctx->op = wc.opcode;
1234 srpt_schedule_thread(ioctx);
1236 switch (wc.opcode) {
1238 srpt_handle_send_comp(ch, ioctx,
1239 scst_estimate_context());
1241 case IB_WC_RDMA_WRITE:
1242 case IB_WC_RDMA_READ:
1243 srpt_handle_rdma_comp(ch, ioctx);
1253 * Create a completion queue on the specified device.
1255 static int srpt_create_ch_ib(struct srpt_rdma_ch *ch)
1257 struct ib_qp_init_attr *qp_init;
1258 struct srpt_device *sdev = ch->sport->sdev;
1262 qp_init = kzalloc(sizeof *qp_init, GFP_KERNEL);
1266 /* Create a completion queue (CQ). */
1268 cqe = SRPT_RQ_SIZE + SRPT_SQ_SIZE - 1;
1269 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(RHEL_RELEASE_CODE)
1270 ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe);
1272 ch->cq = ib_create_cq(sdev->device, srpt_completion, NULL, ch, cqe, 0);
1274 if (IS_ERR(ch->cq)) {
1275 ret = PTR_ERR(ch->cq);
1276 printk(KERN_ERR PFX "failed to create_cq cqe= %d ret= %d\n",
1281 /* Request completion notification. */
1283 ib_req_notify_cq(ch->cq, IB_CQ_NEXT_COMP);
1285 /* Create a queue pair (QP). */
1287 qp_init->qp_context = (void *)ch;
1288 qp_init->event_handler = srpt_qp_event;
1289 qp_init->send_cq = ch->cq;
1290 qp_init->recv_cq = ch->cq;
1291 qp_init->srq = sdev->srq;
1292 qp_init->sq_sig_type = IB_SIGNAL_REQ_WR;
1293 qp_init->qp_type = IB_QPT_RC;
1294 qp_init->cap.max_send_wr = SRPT_SQ_SIZE;
1295 qp_init->cap.max_send_sge = SRPT_DEF_SG_PER_WQE;
1297 ch->qp = ib_create_qp(sdev->pd, qp_init);
1298 if (IS_ERR(ch->qp)) {
1299 ret = PTR_ERR(ch->qp);
1300 ib_destroy_cq(ch->cq);
1301 printk(KERN_ERR PFX "failed to create_qp ret= %d\n", ret);
1305 printk(KERN_DEBUG PFX "%s: max_cqe= %d max_sge= %d cm_id= %p\n",
1306 __func__, ch->cq->cqe, qp_init->cap.max_send_sge,
1309 /* Modify the attributes and the state of queue pair ch->qp. */
1311 ret = srpt_init_ch_qp(ch, ch->qp);
1313 ib_destroy_qp(ch->qp);
1314 ib_destroy_cq(ch->cq);
1318 atomic_set(&ch->req_lim_delta, SRPT_RQ_SIZE);
1324 static struct srpt_rdma_ch *srpt_find_channel(struct ib_cm_id *cm_id)
1326 struct srpt_device *sdev = cm_id->context;
1327 struct srpt_rdma_ch *ch, *tmp_ch;
1329 spin_lock_irq(&sdev->spinlock);
1330 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1331 if (ch->cm_id == cm_id) {
1332 spin_unlock_irq(&sdev->spinlock);
1337 spin_unlock_irq(&sdev->spinlock);
1342 static int srpt_release_channel(struct srpt_rdma_ch *ch, int destroy_cmid)
1344 spin_lock_irq(&ch->sport->sdev->spinlock);
1345 list_del(&ch->list);
1346 spin_unlock_irq(&ch->sport->sdev->spinlock);
1348 if (ch->cm_id && destroy_cmid) {
1349 printk(KERN_WARNING PFX
1350 "%s: destroy cm_id= %p\n", __func__, ch->cm_id);
1351 ib_destroy_cm_id(ch->cm_id);
1355 ib_destroy_qp(ch->qp);
1356 ib_destroy_cq(ch->cq);
1358 if (ch->scst_sess) {
1359 struct srpt_ioctx *ioctx, *ioctx_tmp;
1361 printk(KERN_WARNING PFX
1362 "%s: release sess= %p sess_name= %s active_cmd= %d\n",
1363 __func__, ch->scst_sess, ch->sess_name,
1364 ch->active_scmnd_cnt);
1366 list_for_each_entry_safe(ioctx, ioctx_tmp,
1367 &ch->active_scmnd_list, scmnd_list) {
1368 list_del(&ioctx->scmnd_list);
1369 ch->active_scmnd_cnt--;
1372 scst_unregister_session(ch->scst_sess, 0, NULL);
1373 ch->scst_sess = NULL;
1378 return destroy_cmid ? 0 : 1;
1381 static int srpt_disconnect_channel(struct srpt_rdma_ch *ch, int dreq)
1383 spin_lock_irq(&ch->spinlock);
1384 ch->state = RDMA_CHANNEL_DISCONNECTING;
1385 spin_unlock_irq(&ch->spinlock);
1388 ib_send_cm_dreq(ch->cm_id, NULL, 0);
1390 ib_send_cm_drep(ch->cm_id, NULL, 0);
1395 static int srpt_cm_req_recv(struct ib_cm_id *cm_id,
1396 struct ib_cm_req_event_param *param,
1399 struct srpt_device *sdev = cm_id->context;
1400 struct srp_login_req *req;
1401 struct srp_login_rsp *rsp;
1402 struct srp_login_rej *rej;
1403 struct ib_cm_rep_param *rep_param;
1404 struct srpt_rdma_ch *ch, *tmp_ch;
1408 if (!sdev || !private_data)
1411 rsp = kzalloc(sizeof *rsp, GFP_KERNEL);
1412 rej = kzalloc(sizeof *rej, GFP_KERNEL);
1413 rep_param = kzalloc(sizeof *rep_param, GFP_KERNEL);
1415 if (!rsp || !rej || !rep_param) {
1420 req = (struct srp_login_req *)private_data;
1422 it_iu_len = be32_to_cpu(req->req_it_iu_len);
1424 printk(KERN_DEBUG PFX
1425 "Host login i_port_id=0x%llx:0x%llx t_port_id=0x%llx:0x%llx"
1427 (unsigned long long)
1428 be64_to_cpu(*(u64 *)&req->initiator_port_id[0]),
1429 (unsigned long long)
1430 be64_to_cpu(*(u64 *)&req->initiator_port_id[8]),
1431 (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[0]),
1432 (unsigned long long)be64_to_cpu(*(u64 *)&req->target_port_id[8]),
1435 if (it_iu_len > MAX_MESSAGE_SIZE || it_iu_len < 64) {
1437 cpu_to_be32(SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE);
1439 printk(KERN_WARNING PFX
1440 "Reject invalid it_iu_len=%d\n", it_iu_len);
1444 if ((req->req_flags & 0x3) == SRP_MULTICHAN_SINGLE) {
1445 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_NO_CHAN;
1447 spin_lock_irq(&sdev->spinlock);
1449 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list) {
1450 if (!memcmp(ch->i_port_id, req->initiator_port_id, 16)
1451 && !memcmp(ch->t_port_id, req->target_port_id, 16)
1452 && param->port == ch->sport->port
1453 && param->listen_id == ch->sport->sdev->cm_id
1455 /* found an existing channel */
1456 printk(KERN_WARNING PFX
1457 "Found existing channel name= %s"
1458 " cm_id= %p state= %d\n",
1459 ch->sess_name, ch->cm_id, ch->state);
1461 spin_unlock_irq(&sdev->spinlock);
1464 SRP_LOGIN_RSP_MULTICHAN_TERMINATED;
1466 if (ch->state == RDMA_CHANNEL_LIVE)
1467 srpt_disconnect_channel(ch, 1);
1468 else if (ch->state == RDMA_CHANNEL_CONNECTING) {
1469 ib_send_cm_rej(ch->cm_id,
1470 IB_CM_REJ_NO_RESOURCES,
1472 srpt_release_channel(ch, 1);
1475 spin_lock_irq(&sdev->spinlock);
1479 spin_unlock_irq(&sdev->spinlock);
1482 rsp->rsp_flags = SRP_LOGIN_RSP_MULTICHAN_MAINTAINED;
1484 if (((u64) (*(u64 *) req->target_port_id) !=
1485 cpu_to_be64(mellanox_ioc_guid)) ||
1486 ((u64) (*(u64 *) (req->target_port_id + 8)) !=
1487 cpu_to_be64(mellanox_ioc_guid))) {
1489 cpu_to_be32(SRP_LOGIN_REJ_UNABLE_ASSOCIATE_CHANNEL);
1491 printk(KERN_WARNING PFX "Reject invalid target_port_id\n");
1495 ch = kzalloc(sizeof *ch, GFP_KERNEL);
1497 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1498 printk(KERN_WARNING PFX "Reject failed allocate rdma_ch\n");
1503 spin_lock_init(&ch->spinlock);
1504 memcpy(ch->i_port_id, req->initiator_port_id, 16);
1505 memcpy(ch->t_port_id, req->target_port_id, 16);
1506 ch->sport = &sdev->port[param->port - 1];
1508 ch->state = RDMA_CHANNEL_CONNECTING;
1509 INIT_LIST_HEAD(&ch->cmd_wait_list);
1510 INIT_LIST_HEAD(&ch->active_scmnd_list);
1512 ret = srpt_create_ch_ib(ch);
1514 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1515 printk(KERN_WARNING PFX "Reject failed to create rdma_ch\n");
1519 ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTR);
1521 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1522 printk(KERN_WARNING PFX
1523 "Reject failed qp to rtr/rts ret=%d\n", ret);
1527 sprintf(ch->sess_name, "0x%016llx%016llx",
1528 (unsigned long long)be64_to_cpu(*(u64 *)ch->i_port_id),
1529 (unsigned long long)be64_to_cpu(*(u64 *)(ch->i_port_id + 8)));
1531 BUG_ON(!sdev->scst_tgt);
1532 ch->scst_sess = scst_register_session(sdev->scst_tgt, 0, ch->sess_name,
1534 if (!ch->scst_sess) {
1535 rej->reason = cpu_to_be32(SRP_LOGIN_REJ_INSUFFICIENT_RESOURCES);
1536 printk(KERN_WARNING PFX "Failed to create scst sess\n");
1540 spin_lock_irq(&sdev->spinlock);
1541 list_add_tail(&ch->list, &sdev->rch_list);
1542 spin_unlock_irq(&sdev->spinlock);
1544 printk(KERN_DEBUG PFX "Establish connection sess=%p name=%s cm_id=%p\n",
1545 ch->scst_sess, ch->sess_name, ch->cm_id);
1547 scst_sess_set_tgt_priv(ch->scst_sess, ch);
1549 /* create srp_login_response */
1550 rsp->opcode = SRP_LOGIN_RSP;
1551 rsp->tag = req->tag;
1552 rsp->max_it_iu_len = req->req_it_iu_len;
1553 rsp->max_ti_iu_len = req->req_it_iu_len;
1555 cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1556 rsp->req_lim_delta = cpu_to_be32(SRPT_RQ_SIZE);
1557 atomic_set(&ch->req_lim_delta, 0);
1559 /* create cm reply */
1560 rep_param->qp_num = ch->qp->qp_num;
1561 rep_param->private_data = (void *)rsp;
1562 rep_param->private_data_len = sizeof *rsp;
1563 rep_param->rnr_retry_count = 7;
1564 rep_param->flow_control = 1;
1565 rep_param->failover_accepted = 0;
1567 rep_param->responder_resources = 4;
1568 rep_param->initiator_depth = 4;
1570 ret = ib_send_cm_rep(cm_id, rep_param);
1572 srpt_release_channel(ch, 0);
1577 ib_destroy_qp(ch->qp);
1578 ib_destroy_cq(ch->cq);
1584 rej->opcode = SRP_LOGIN_REJ;
1585 rej->tag = req->tag;
1587 cpu_to_be16(SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT);
1589 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED, NULL, 0,
1590 (void *)rej, sizeof *rej);
1600 static int srpt_find_and_release_channel(struct ib_cm_id *cm_id)
1602 struct srpt_rdma_ch *ch;
1604 ch = srpt_find_channel(cm_id);
1608 return srpt_release_channel(ch, 0);
1611 static int srpt_cm_rej_recv(struct ib_cm_id *cm_id)
1613 printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1614 return srpt_find_and_release_channel(cm_id);
1617 static int srpt_cm_rtu_recv(struct ib_cm_id *cm_id)
1619 struct srpt_rdma_ch *ch;
1622 ch = srpt_find_channel(cm_id);
1626 if (ch->state == RDMA_CHANNEL_CONNECTING) {
1627 struct srpt_ioctx *ioctx, *ioctx_tmp;
1629 spin_lock_irq(&ch->spinlock);
1630 ch->state = RDMA_CHANNEL_LIVE;
1631 spin_unlock_irq(&ch->spinlock);
1632 ret = srpt_ch_qp_rtr_rts(ch, ch->qp, IB_QPS_RTS);
1634 list_for_each_entry_safe(ioctx, ioctx_tmp, &ch->cmd_wait_list,
1636 list_del(&ioctx->wait_list);
1637 srpt_handle_new_iu(ch, ioctx);
1639 } else if (ch->state == RDMA_CHANNEL_DISCONNECTING)
1645 printk(KERN_ERR PFX "cm_id=%p sess_name=%s state=%d\n",
1646 cm_id, ch->sess_name, ch->state);
1647 srpt_disconnect_channel(ch, 1);
1653 static int srpt_cm_timewait_exit(struct ib_cm_id *cm_id)
1655 printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1656 return srpt_find_and_release_channel(cm_id);
1659 static int srpt_cm_rep_error(struct ib_cm_id *cm_id)
1661 printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1662 return srpt_find_and_release_channel(cm_id);
1665 static int srpt_cm_dreq_recv(struct ib_cm_id *cm_id)
1667 struct srpt_rdma_ch *ch;
1670 ch = srpt_find_channel(cm_id);
1675 printk(KERN_DEBUG PFX "%s: cm_id= %p ch->state= %d\n",
1676 __func__, cm_id, ch->state);
1678 switch (ch->state) {
1679 case RDMA_CHANNEL_LIVE:
1680 case RDMA_CHANNEL_CONNECTING:
1681 ret = srpt_disconnect_channel(ch, 0);
1683 case RDMA_CHANNEL_DISCONNECTING:
1691 static int srpt_cm_drep_recv(struct ib_cm_id *cm_id)
1693 printk(KERN_DEBUG PFX "%s: cm_id=%p\n", __func__, cm_id);
1694 return srpt_find_and_release_channel(cm_id);
1697 static int srpt_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
1701 switch (event->event) {
1702 case IB_CM_REQ_RECEIVED:
1703 ret = srpt_cm_req_recv(cm_id, &event->param.req_rcvd,
1704 event->private_data);
1706 case IB_CM_REJ_RECEIVED:
1707 ret = srpt_cm_rej_recv(cm_id);
1709 case IB_CM_RTU_RECEIVED:
1710 case IB_CM_USER_ESTABLISHED:
1711 ret = srpt_cm_rtu_recv(cm_id);
1713 case IB_CM_DREQ_RECEIVED:
1714 ret = srpt_cm_dreq_recv(cm_id);
1716 case IB_CM_DREP_RECEIVED:
1717 ret = srpt_cm_drep_recv(cm_id);
1719 case IB_CM_TIMEWAIT_EXIT:
1720 ret = srpt_cm_timewait_exit(cm_id);
1722 case IB_CM_REP_ERROR:
1723 ret = srpt_cm_rep_error(cm_id);
1732 static int srpt_map_sg_to_ib_sge(struct srpt_rdma_ch *ch,
1733 struct srpt_ioctx *ioctx,
1734 struct scst_cmd *scmnd)
1736 struct scatterlist *scat;
1737 scst_data_direction dir;
1738 struct rdma_iu *riu;
1739 struct srp_direct_buf *db;
1740 dma_addr_t dma_addr;
1749 scat = scst_cmd_get_sg(scmnd);
1750 dir = scst_cmd_get_data_direction(scmnd);
1751 count = dma_map_sg(ch->sport->sdev->device->dma_device, scat,
1752 scst_cmd_get_sg_cnt(scmnd),
1753 scst_to_tgt_dma_dir(dir));
1754 if (unlikely(!count))
1757 if (ioctx->rdma_ius && ioctx->n_rdma_ius)
1758 nrdma = ioctx->n_rdma_ius;
1760 nrdma = count / SRPT_DEF_SG_PER_WQE + ioctx->n_rbuf;
1762 ioctx->rdma_ius = kzalloc(nrdma * sizeof *riu,
1763 scst_cmd_atomic(scmnd)
1764 ? GFP_ATOMIC : GFP_KERNEL);
1765 if (!ioctx->rdma_ius) {
1766 dma_unmap_sg(ch->sport->sdev->device->dma_device,
1767 scat, scst_cmd_get_sg_cnt(scmnd),
1768 scst_to_tgt_dma_dir(dir));
1772 ioctx->n_rdma_ius = nrdma;
1776 tsize = (dir == SCST_DATA_READ) ?
1777 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1778 dma_len = sg_dma_len(&scat[0]);
1779 riu = ioctx->rdma_ius;
1782 * For each remote desc - calculate the #ib_sge.
1783 * If #ib_sge < SRPT_DEF_SG_PER_WQE per rdma operation then
1784 * each remote desc rdma_iu is required a rdma wr;
1786 * we need to allocate extra rdma_iu to carry extra #ib_sge in
1790 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1791 rsize = be32_to_cpu(db->len);
1792 raddr = be64_to_cpu(db->va);
1794 riu->rkey = be32_to_cpu(db->key);
1797 /* calculate how many sge required for this remote_buf */
1798 while (rsize > 0 && tsize > 0) {
1800 if (rsize >= dma_len) {
1808 dma_len = sg_dma_len(&scat[j]);
1818 if (rsize > 0 && riu->sge_cnt == SRPT_DEF_SG_PER_WQE) {
1820 kmalloc(riu->sge_cnt * sizeof *riu->sge,
1821 scst_cmd_atomic(scmnd)
1822 ? GFP_ATOMIC : GFP_KERNEL);
1830 riu->rkey = be32_to_cpu(db->key);
1834 riu->sge = kmalloc(riu->sge_cnt * sizeof *riu->sge,
1835 scst_cmd_atomic(scmnd)
1836 ? GFP_ATOMIC : GFP_KERNEL);
1845 scat = scst_cmd_get_sg(scmnd);
1846 tsize = (dir == SCST_DATA_READ) ?
1847 scst_cmd_get_resp_data_len(scmnd) : scst_cmd_get_bufflen(scmnd);
1848 riu = ioctx->rdma_ius;
1849 dma_len = sg_dma_len(&scat[0]);
1850 dma_addr = sg_dma_address(&scat[0]);
1852 /* this second loop is really mapped sg_addres to rdma_iu->ib_sge */
1854 j < count && i < ioctx->n_rbuf && tsize > 0; ++i, ++riu, ++db) {
1855 rsize = be32_to_cpu(db->len);
1859 while (rsize > 0 && tsize > 0) {
1860 sge->addr = dma_addr;
1861 sge->lkey = ch->sport->sdev->mr->lkey;
1863 if (rsize >= dma_len) {
1865 (tsize < dma_len) ? tsize : dma_len;
1872 dma_len = sg_dma_len(&scat[j]);
1874 sg_dma_address(&scat[j]);
1878 sge->length = (tsize < rsize) ? tsize : rsize;
1886 if (k == riu->sge_cnt && rsize > 0) {
1890 } else if (rsize > 0)
1898 while (ioctx->n_rdma)
1899 kfree(ioctx->rdma_ius[ioctx->n_rdma--].sge);
1901 kfree(ioctx->rdma_ius);
1903 dma_unmap_sg(ch->sport->sdev->device->dma_device,
1904 scat, scst_cmd_get_sg_cnt(scmnd),
1905 scst_to_tgt_dma_dir(dir));
1910 static int srpt_perform_rdmas(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1911 scst_data_direction dir)
1913 struct ib_send_wr wr;
1914 struct ib_send_wr *bad_wr;
1915 struct rdma_iu *riu;
1919 riu = ioctx->rdma_ius;
1920 memset(&wr, 0, sizeof wr);
1922 for (i = 0; i < ioctx->n_rdma; ++i, ++riu) {
1923 wr.opcode = (dir == SCST_DATA_READ) ?
1924 IB_WR_RDMA_WRITE : IB_WR_RDMA_READ;
1926 wr.wr_id = ioctx->index;
1927 wr.wr.rdma.remote_addr = riu->raddr;
1928 wr.wr.rdma.rkey = riu->rkey;
1929 wr.num_sge = riu->sge_cnt;
1930 wr.sg_list = riu->sge;
1932 /* only get completion event for the last rdma wr */
1933 if (i == (ioctx->n_rdma - 1) && dir == SCST_DATA_WRITE)
1934 wr.send_flags = IB_SEND_SIGNALED;
1936 ret = ib_post_send(ch->qp, &wr, &bad_wr);
1945 * Start data reception. Must not block.
1947 static int srpt_xfer_data(struct srpt_rdma_ch *ch, struct srpt_ioctx *ioctx,
1948 struct scst_cmd *scmnd)
1952 ret = srpt_map_sg_to_ib_sge(ch, ioctx, scmnd);
1954 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1955 ret = SCST_TGT_RES_QUEUE_FULL;
1959 ret = srpt_perform_rdmas(ch, ioctx, scst_cmd_get_data_direction(scmnd));
1961 printk(KERN_ERR PFX "%s[%d] ret=%d\n", __func__, __LINE__, ret);
1962 if (ret == -EAGAIN || ret == -ENOMEM)
1963 ret = SCST_TGT_RES_QUEUE_FULL;
1965 ret = SCST_TGT_RES_FATAL_ERROR;
1969 ret = SCST_TGT_RES_SUCCESS;
1976 * Called by the SCST core to inform ib_srpt that data reception should start.
1979 static int srpt_rdy_to_xfer(struct scst_cmd *scmnd)
1981 struct srpt_rdma_ch *ch;
1982 struct srpt_ioctx *ioctx;
1984 ioctx = scst_cmd_get_tgt_priv(scmnd);
1987 ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
1990 if (ch->state == RDMA_CHANNEL_DISCONNECTING)
1991 return SCST_TGT_RES_FATAL_ERROR;
1992 else if (ch->state == RDMA_CHANNEL_CONNECTING)
1993 return SCST_TGT_RES_QUEUE_FULL;
1995 return srpt_xfer_data(ch, ioctx, scmnd);
1999 * Called by the SCST core. Transmits the response buffer and status held in
2000 * 'scmnd'. Must not block.
2002 static int srpt_xmit_response(struct scst_cmd *scmnd)
2004 struct srpt_rdma_ch *ch;
2005 struct srpt_ioctx *ioctx;
2006 struct srp_rsp *srp_rsp;
2008 int ret = SCST_TGT_RES_SUCCESS;
2012 ioctx = scst_cmd_get_tgt_priv(scmnd);
2015 ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2018 tag = scst_cmd_get_tag(scmnd);
2020 if (ch->state != RDMA_CHANNEL_LIVE) {
2022 "%s: tag= %lld channel in bad state %d\n",
2023 __func__, (unsigned long long)tag, ch->state);
2025 if (ch->state == RDMA_CHANNEL_DISCONNECTING)
2026 ret = SCST_TGT_RES_FATAL_ERROR;
2027 else if (ch->state == RDMA_CHANNEL_CONNECTING)
2028 ret = SCST_TGT_RES_QUEUE_FULL;
2030 if (unlikely(scst_cmd_aborted(scmnd)))
2036 dma_sync_single_for_cpu(ch->sport->sdev->device->dma_device, ioctx->dma,
2037 MAX_MESSAGE_SIZE, DMA_TO_DEVICE);
2039 srp_rsp = ioctx->buf;
2041 if (unlikely(scst_cmd_aborted(scmnd))) {
2043 "%s: tag= %lld already get aborted\n",
2044 __func__, (unsigned long long)tag);
2048 dir = scst_cmd_get_data_direction(scmnd);
2049 status = scst_cmd_get_status(scmnd) & 0xff;
2051 srpt_build_cmd_rsp(ch, ioctx, NO_SENSE, NO_ADD_SENSE, tag);
2053 if (SCST_SENSE_VALID(scst_cmd_get_sense_buffer(scmnd))) {
2054 srp_rsp->sense_data_len = scst_cmd_get_sense_buffer_len(scmnd);
2055 if (srp_rsp->sense_data_len >
2056 (MAX_MESSAGE_SIZE - sizeof *srp_rsp))
2057 srp_rsp->sense_data_len =
2058 MAX_MESSAGE_SIZE - sizeof *srp_rsp;
2060 memcpy((u8 *) (srp_rsp + 1), scst_cmd_get_sense_buffer(scmnd),
2061 srp_rsp->sense_data_len);
2063 srp_rsp->sense_data_len = cpu_to_be32(srp_rsp->sense_data_len);
2064 srp_rsp->flags |= SRP_RSP_FLAG_SNSVALID;
2067 status = SAM_STAT_CHECK_CONDITION;
2070 srp_rsp->status = status;
2072 /* transfer read data if any */
2073 if (dir == SCST_DATA_READ && scst_cmd_get_resp_data_len(scmnd)) {
2074 ret = srpt_xfer_data(ch, ioctx, scmnd);
2075 if (ret != SCST_TGT_RES_SUCCESS) {
2077 "%s: tag= %lld xfer_data failed\n",
2078 __func__, (unsigned long long)tag);
2083 if (srpt_post_send(ch, ioctx,
2085 be32_to_cpu(srp_rsp->sense_data_len))) {
2086 printk(KERN_ERR PFX "%s: ch->state= %d tag= %lld\n",
2087 __func__, ch->state,
2088 (unsigned long long)tag);
2089 ret = SCST_TGT_RES_FATAL_ERROR;
2096 ret = SCST_TGT_RES_SUCCESS;
2097 scst_set_delivery_status(scmnd, SCST_CMD_DELIVERY_ABORTED);
2098 scst_tgt_cmd_done(scmnd, SCST_CONTEXT_SAME);
2103 * Called by the SCST core to inform ib_srpt that a received task management
2104 * function has been completed. Must not block.
2106 static void srpt_tsk_mgmt_done(struct scst_mgmt_cmd *mcmnd)
2108 struct srpt_rdma_ch *ch;
2109 struct srpt_mgmt_ioctx *mgmt_ioctx;
2110 struct srpt_ioctx *ioctx;
2112 mgmt_ioctx = scst_mgmt_cmd_get_tgt_priv(mcmnd);
2113 BUG_ON(!mgmt_ioctx);
2115 ch = mgmt_ioctx->ch;
2118 ioctx = mgmt_ioctx->ioctx;
2121 printk(KERN_WARNING PFX
2122 "%s: tsk_mgmt_done for tag= %lld status=%d\n",
2123 __func__, (unsigned long long)mgmt_ioctx->tag,
2124 scst_mgmt_cmd_get_status(mcmnd));
2126 srpt_build_tskmgmt_rsp(ch, ioctx,
2127 (scst_mgmt_cmd_get_status(mcmnd) ==
2128 SCST_MGMT_STATUS_SUCCESS) ?
2129 SRP_TSK_MGMT_SUCCESS : SRP_TSK_MGMT_FAILED,
2131 srpt_post_send(ch, ioctx, sizeof(struct srp_rsp) + 4);
2133 scst_mgmt_cmd_set_tgt_priv(mcmnd, NULL);
2139 * Called by the SCST core to inform ib_srpt that the command 'scmnd' is about
2140 * to be freed. May be called in IRQ context.
2142 static void srpt_on_free_cmd(struct scst_cmd *scmnd)
2144 struct srpt_rdma_ch *ch;
2145 struct srpt_ioctx *ioctx;
2147 ioctx = scst_cmd_get_tgt_priv(scmnd);
2150 ch = scst_sess_get_tgt_priv(scst_cmd_get_session(scmnd));
2153 spin_lock_irq(&ch->spinlock);
2154 list_del(&ioctx->scmnd_list);
2155 ch->active_scmnd_cnt--;
2156 spin_unlock_irq(&ch->spinlock);
2158 srpt_reset_ioctx(ch, ioctx);
2159 scst_cmd_set_tgt_priv(scmnd, NULL);
2162 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2163 /* A vanilla 2.6.19 or older kernel without backported OFED kernel headers. */
2164 static void srpt_refresh_port_work(void *ctx)
2166 static void srpt_refresh_port_work(struct work_struct *work)
2169 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2170 struct srpt_port *sport = (struct srpt_port *)ctx;
2172 struct srpt_port *sport = container_of(work, struct srpt_port, work);
2175 srpt_refresh_port(sport);
2179 * Called by the SCST core to detect target adapters. Returns the number of
2180 * detected target adapters.
2182 static int srpt_detect(struct scst_tgt_template *tp)
2184 struct srpt_device *sdev;
2189 list_for_each_entry(sdev, &srpt_devices, list)
2198 * Callback function called by the SCST core from scst_unregister() to free up
2199 * the resources associated with device scst_tgt.
2201 static int srpt_release(struct scst_tgt *scst_tgt)
2203 struct srpt_device *sdev = scst_tgt_get_tgt_priv(scst_tgt);
2204 struct srpt_rdma_ch *ch, *tmp_ch;
2209 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2218 list_for_each_entry_safe(ch, tmp_ch, &sdev->rch_list, list)
2219 srpt_release_channel(ch, 1);
2221 srpt_unregister_mad_agent(sdev);
2223 scst_tgt_set_tgt_priv(scst_tgt, NULL);
2231 * Entry point for ib_srpt's kernel thread. This kernel thread is only created
2232 * when the module parameter 'thread' is not zero (the default is zero).
2233 * This thread processes the ioctx list srpt_thread.thread_ioctx_list.
2237 static int srpt_ioctx_thread(void *arg)
2239 struct srpt_ioctx *ioctx;
2241 /* Hibernation / freezing of the SRPT kernel thread is not supported. */
2242 current->flags |= PF_NOFREEZE;
2244 spin_lock_irq(&srpt_thread.thread_lock);
2245 while (!kthread_should_stop()) {
2247 init_waitqueue_entry(&wait, current);
2249 if (!srpt_test_ioctx_list()) {
2250 add_wait_queue_exclusive(&ioctx_list_waitQ, &wait);
2253 set_current_state(TASK_INTERRUPTIBLE);
2254 if (srpt_test_ioctx_list())
2256 spin_unlock_irq(&srpt_thread.thread_lock);
2258 spin_lock_irq(&srpt_thread.thread_lock);
2260 set_current_state(TASK_RUNNING);
2261 remove_wait_queue(&ioctx_list_waitQ, &wait);
2264 while (!list_empty(&srpt_thread.thread_ioctx_list)) {
2265 ioctx = list_entry(srpt_thread.thread_ioctx_list.next,
2266 struct srpt_ioctx, comp_list);
2268 list_del(&ioctx->comp_list);
2270 spin_unlock_irq(&srpt_thread.thread_lock);
2271 switch (ioctx->op) {
2273 srpt_handle_send_comp(ioctx->ch, ioctx,
2274 SCST_CONTEXT_DIRECT);
2276 case IB_WC_RDMA_WRITE:
2277 case IB_WC_RDMA_READ:
2278 srpt_handle_rdma_comp(ioctx->ch, ioctx);
2281 srpt_handle_new_iu(ioctx->ch, ioctx);
2286 spin_lock_irq(&srpt_thread.thread_lock);
2289 spin_unlock_irq(&srpt_thread.thread_lock);
2294 /* SCST target template for the SRP target implementation. */
2295 static struct scst_tgt_template srpt_template = {
2297 .sg_tablesize = SRPT_DEF_SG_TABLESIZE,
2298 .xmit_response_atomic = 1,
2299 .rdy_to_xfer_atomic = 1,
2301 .detect = srpt_detect,
2302 .release = srpt_release,
2303 .xmit_response = srpt_xmit_response,
2304 .rdy_to_xfer = srpt_rdy_to_xfer,
2305 .on_free_cmd = srpt_on_free_cmd,
2306 .task_mgmt_fn_done = srpt_tsk_mgmt_done
2310 * The callback function srpt_release_class_dev() is called whenever a
2311 * device is removed from the /sys/class/infiniband_srpt device class.
2313 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2314 static void srpt_release_class_dev(struct class_device *class_dev)
2316 static void srpt_release_class_dev(struct device *dev)
2321 static const struct { int flag; const char *const label; }
2322 srpt_trace_label[] =
2324 { TRACE_OUT_OF_MEM, "out_of_mem" },
2325 { TRACE_MINOR, "minor" },
2326 { TRACE_SG_OP, "sg" },
2327 { TRACE_MEMORY, "mem" },
2328 { TRACE_BUFF, "buff" },
2329 { TRACE_ENTRYEXIT, "entryexit" },
2330 { TRACE_PID, "pid" },
2331 { TRACE_LINE, "line" },
2332 { TRACE_FUNCTION, "function" },
2333 { TRACE_DEBUG, "debug" },
2334 { TRACE_SPECIAL, "special" },
2335 { TRACE_SCSI, "scsi" },
2336 { TRACE_MGMT, "mgmt" },
2337 { TRACE_MGMT_MINOR, "mgmt_minor" },
2338 { TRACE_MGMT_DEBUG, "mgmt_dbg" },
2342 * Convert a label into a trace flag. Consider exactly 'len' characters of
2343 * the label and ignore case. Return zero if no match has been found.
2345 static unsigned long trace_label_to_flag(const char *const label, int len)
2349 for (i = 0; i < ARRAY_SIZE(srpt_trace_label); i++)
2350 if (strncasecmp(srpt_trace_label[i].label, label, len) == 0)
2351 return srpt_trace_label[i].flag;
2357 * Parse multiple tracing flags separated by whitespace. Return zero upon
2360 static unsigned long parse_flags(const char *buf, int count)
2362 unsigned long result = 0;
2367 for (p = buf; p < buf + count; p = e) {
2368 while (p < buf + count && isspace(*p))
2371 while (e < buf + count && !isspace(*e))
2375 flag = trace_label_to_flag(p, e - p);
2384 * Convert a flag into a label. A flag is an integer with exactly one bit set.
2385 * Return NULL upon failure.
2387 static const char *trace_flag_to_label(unsigned long flag)
2394 for (i = 0; i < ARRAY_SIZE(srpt_trace_label); i++)
2395 if (srpt_trace_label[i].flag == flag)
2396 return srpt_trace_label[i].label;
2401 /** sysfs function for showing the "trace_level" attribute. */
2402 static ssize_t srpt_show_trace_flags(struct class *class, char *buf)
2407 if (trace_flag == 0) {
2408 strcpy(buf, "none\n");
2413 for (i = 0; i < 8 * sizeof(trace_flag); i++) {
2416 label = trace_flag_to_label(trace_flag & (1UL << i));
2429 /** sysfs function for storing the "trace_level" attribute. */
2430 static ssize_t srpt_store_trace_flags(struct class *class,
2431 const char *buf, size_t count)
2433 unsigned long flags;
2435 if (strncasecmp(buf, "all", 3) == 0)
2436 trace_flag = TRACE_ALL;
2437 else if (strncasecmp(buf, "none", 4) == 0
2438 || strncasecmp(buf, "null", 4) == 0) {
2440 } else if (strncasecmp(buf, "default", 7) == 0)
2441 trace_flag = DEFAULT_SRPT_TRACE_FLAGS;
2442 else if (strncasecmp(buf, "set ", 4) == 0) {
2443 flags = parse_flags(buf + 4, count - 4);
2448 } else if (strncasecmp(buf, "add ", 4) == 0) {
2449 flags = parse_flags(buf + 4, count - 4);
2451 trace_flag |= flags;
2454 } else if (strncasecmp(buf, "del ", 4) == 0) {
2455 flags = parse_flags(buf + 4, count - 4);
2457 trace_flag &= ~flags;
2460 } else if (strncasecmp(buf, "value ", 4) == 0)
2461 trace_flag = simple_strtoul(buf + 4, NULL, 0);
2467 static struct class_attribute srpt_class_attrs[] = {
2468 #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
2469 __ATTR(trace_level, 0600, srpt_show_trace_flags,
2470 srpt_store_trace_flags),
2475 static struct class srpt_class = {
2476 .name = "infiniband_srpt",
2477 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2478 .release = srpt_release_class_dev,
2480 .dev_release = srpt_release_class_dev,
2482 .class_attrs = srpt_class_attrs,
2485 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2486 static ssize_t show_login_info(struct class_device *class_dev, char *buf)
2488 static ssize_t show_login_info(struct device *dev,
2489 struct device_attribute *attr, char *buf)
2492 struct srpt_device *sdev =
2493 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2494 container_of(class_dev, struct srpt_device, class_dev);
2496 container_of(dev, struct srpt_device, dev);
2498 struct srpt_port *sport;
2502 for (i = 0; i < sdev->device->phys_port_cnt; i++) {
2503 sport = &sdev->port[i];
2505 len += sprintf(buf + len,
2506 "tid_ext=%016llx,ioc_guid=%016llx,pkey=ffff,"
2507 "dgid=%04x%04x%04x%04x%04x%04x%04x%04x,"
2508 "service_id=%016llx\n",
2509 (unsigned long long) mellanox_ioc_guid,
2510 (unsigned long long) mellanox_ioc_guid,
2511 be16_to_cpu(((__be16 *) sport->gid.raw)[0]),
2512 be16_to_cpu(((__be16 *) sport->gid.raw)[1]),
2513 be16_to_cpu(((__be16 *) sport->gid.raw)[2]),
2514 be16_to_cpu(((__be16 *) sport->gid.raw)[3]),
2515 be16_to_cpu(((__be16 *) sport->gid.raw)[4]),
2516 be16_to_cpu(((__be16 *) sport->gid.raw)[5]),
2517 be16_to_cpu(((__be16 *) sport->gid.raw)[6]),
2518 be16_to_cpu(((__be16 *) sport->gid.raw)[7]),
2519 (unsigned long long) mellanox_ioc_guid);
2525 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2526 static CLASS_DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2528 static DEVICE_ATTR(login_info, S_IRUGO, show_login_info, NULL);
2532 * Callback function called by the InfiniBand core when either an InfiniBand
2533 * device has been added or during the ib_register_client() call for each
2534 * registered InfiniBand device.
2536 static void srpt_add_one(struct ib_device *device)
2538 struct srpt_device *sdev;
2539 struct srpt_port *sport;
2540 struct ib_srq_init_attr srq_attr;
2545 sdev = kzalloc(sizeof *sdev, GFP_KERNEL);
2549 sdev->device = device;
2551 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2552 sdev->class_dev.class = &srpt_class;
2553 sdev->class_dev.dev = device->dma_device;
2554 snprintf(sdev->class_dev.class_id, BUS_ID_SIZE,
2555 "srpt-%s", device->name);
2557 sdev->dev.class = &srpt_class;
2558 sdev->dev.parent = device->dma_device;
2559 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 30)
2560 snprintf(sdev->dev.bus_id, BUS_ID_SIZE, "srpt-%s", device->name);
2562 snprintf(sdev->init_name, sizeof(sdev->init_name),
2563 "srpt-%s", device->name);
2564 sdev->dev.init_name = sdev->init_name;
2568 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2569 if (class_device_register(&sdev->class_dev))
2571 if (class_device_create_file(&sdev->class_dev,
2572 &class_device_attr_login_info))
2575 if (device_register(&sdev->dev))
2577 if (device_create_file(&sdev->dev, &dev_attr_login_info))
2581 if (ib_query_device(device, &sdev->dev_attr))
2584 sdev->pd = ib_alloc_pd(device);
2585 if (IS_ERR(sdev->pd))
2588 sdev->mr = ib_get_dma_mr(sdev->pd, IB_ACCESS_LOCAL_WRITE);
2589 if (IS_ERR(sdev->mr))
2592 srq_attr.event_handler = srpt_srq_event;
2593 srq_attr.srq_context = (void *)sdev;
2594 srq_attr.attr.max_wr = min(SRPT_SRQ_SIZE, sdev->dev_attr.max_srq_wr);
2595 srq_attr.attr.max_sge = 1;
2596 srq_attr.attr.srq_limit = 0;
2598 sdev->srq = ib_create_srq(sdev->pd, &srq_attr);
2599 if (IS_ERR(sdev->srq))
2602 printk(KERN_DEBUG PFX "%s: create SRQ #wr= %d max_allow=%d dev= %s\n",
2603 __func__, srq_attr.attr.max_wr,
2604 sdev->dev_attr.max_srq_wr, device->name);
2606 if (!mellanox_ioc_guid)
2607 mellanox_ioc_guid = be64_to_cpu(device->node_guid);
2609 sdev->cm_id = ib_create_cm_id(device, srpt_cm_handler, sdev);
2610 if (IS_ERR(sdev->cm_id))
2613 /* print out target login information */
2614 printk(KERN_DEBUG PFX "Target login info: id_ext=%016llx,"
2615 "ioc_guid=%016llx,pkey=ffff,service_id=%016llx\n",
2616 (unsigned long long) mellanox_ioc_guid,
2617 (unsigned long long) mellanox_ioc_guid,
2618 (unsigned long long) mellanox_ioc_guid);
2621 * We do not have a consistent service_id (ie. also id_ext of target_id)
2622 * to identify this target. We currently use the guid of the first HCA
2623 * in the system as service_id; therefore, the target_id will change
2624 * if this HCA is gone bad and replaced by different HCA
2626 if (ib_cm_listen(sdev->cm_id, cpu_to_be64(mellanox_ioc_guid), 0, NULL))
2629 INIT_IB_EVENT_HANDLER(&sdev->event_handler, sdev->device,
2630 srpt_event_handler);
2631 if (ib_register_event_handler(&sdev->event_handler))
2634 if (srpt_alloc_ioctx_ring(sdev))
2637 INIT_LIST_HEAD(&sdev->rch_list);
2638 spin_lock_init(&sdev->spinlock);
2640 for (i = 0; i < SRPT_SRQ_SIZE; ++i)
2641 srpt_post_recv(sdev, sdev->ioctx_ring[i]);
2643 list_add_tail(&sdev->list, &srpt_devices);
2645 ib_set_client_data(device, &srpt_client, sdev);
2647 sdev->scst_tgt = scst_register(&srpt_template, NULL);
2648 if (!sdev->scst_tgt) {
2649 printk(KERN_ERR PFX "SCST registration failed for %s.\n",
2650 sdev->device->name);
2654 scst_tgt_set_tgt_priv(sdev->scst_tgt, sdev);
2656 for (i = 1; i <= sdev->device->phys_port_cnt; i++) {
2657 sport = &sdev->port[i - 1];
2660 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 20) && ! defined(BACKPORT_LINUX_WORKQUEUE_TO_2_6_19)
2662 * A vanilla 2.6.19 or older kernel without backported OFED
2665 INIT_WORK(&sport->work, srpt_refresh_port_work, sport);
2667 INIT_WORK(&sport->work, srpt_refresh_port_work);
2669 if (srpt_refresh_port(sport)) {
2670 printk(KERN_ERR PFX "MAD registration failed"
2671 " for %s-%d.\n", sdev->device->name, i);
2672 goto err_refresh_port;
2681 scst_unregister(sdev->scst_tgt);
2683 ib_set_client_data(device, &srpt_client, NULL);
2684 list_del(&sdev->list);
2685 srpt_free_ioctx_ring(sdev);
2687 ib_unregister_event_handler(&sdev->event_handler);
2689 ib_destroy_cm_id(sdev->cm_id);
2691 ib_destroy_srq(sdev->srq);
2693 ib_dereg_mr(sdev->mr);
2695 ib_dealloc_pd(sdev->pd);
2697 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2698 class_device_unregister(&sdev->class_dev);
2700 device_unregister(&sdev->dev);
2709 * Callback function called by the InfiniBand core when either an InfiniBand
2710 * device has been removed or during the ib_unregister_client() call for each
2711 * registered InfiniBand device.
2713 static void srpt_remove_one(struct ib_device *device)
2716 struct srpt_device *sdev;
2720 sdev = ib_get_client_data(device, &srpt_client);
2721 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 18)
2731 * Cancel the work if it is queued. Wait until srpt_refresh_port_work()
2732 * finished if it is running.
2734 for (i = 0; i < sdev->device->phys_port_cnt; i++)
2735 cancel_work_sync(&sdev->port[i].work);
2737 scst_unregister(sdev->scst_tgt);
2738 sdev->scst_tgt = NULL;
2740 ib_unregister_event_handler(&sdev->event_handler);
2741 ib_destroy_cm_id(sdev->cm_id);
2742 ib_destroy_srq(sdev->srq);
2743 ib_dereg_mr(sdev->mr);
2744 ib_dealloc_pd(sdev->pd);
2745 #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 26)
2746 class_device_unregister(&sdev->class_dev);
2748 device_unregister(&sdev->dev);
2751 srpt_free_ioctx_ring(sdev);
2752 list_del(&sdev->list);
2759 * Module initialization.
2761 * Note: since ib_register_client() registers callback functions, and since at
2762 * least one of these callback functions (srpt_add_one()) calls SCST functions,
2763 * the SCST target template must be registered before ib_register_client() is
2766 static int __init srpt_init_module(void)
2770 INIT_LIST_HEAD(&srpt_devices);
2772 ret = class_register(&srpt_class);
2774 printk(KERN_ERR PFX "couldn't register class ib_srpt\n");
2778 ret = scst_register_target_template(&srpt_template);
2780 printk(KERN_ERR PFX "couldn't register with scst\n");
2785 ret = ib_register_client(&srpt_client);
2787 printk(KERN_ERR PFX "couldn't register IB client\n");
2792 spin_lock_init(&srpt_thread.thread_lock);
2793 INIT_LIST_HEAD(&srpt_thread.thread_ioctx_list);
2794 srpt_thread.thread = kthread_run(srpt_ioctx_thread,
2795 NULL, "srpt_thread");
2796 if (IS_ERR(srpt_thread.thread)) {
2797 srpt_thread.thread = NULL;
2805 scst_unregister_target_template(&srpt_template);
2807 class_unregister(&srpt_class);
2811 static void __exit srpt_cleanup_module(void)
2815 if (srpt_thread.thread)
2816 kthread_stop(srpt_thread.thread);
2817 ib_unregister_client(&srpt_client);
2818 scst_unregister_target_template(&srpt_template);
2819 class_unregister(&srpt_class);
2824 module_init(srpt_init_module);
2825 module_exit(srpt_cleanup_module);