2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 FILE_LICENCE ( GPL2_OR_LATER );
29 #include <gpxe/list.h>
30 #include <gpxe/if_arp.h>
31 #include <gpxe/netdevice.h>
32 #include <gpxe/iobuf.h>
33 #include <gpxe/ipoib.h>
34 #include <gpxe/process.h>
35 #include <gpxe/infiniband.h>
36 #include <gpxe/ib_gma.h>
44 /** List of Infiniband devices */
45 struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
47 /***************************************************************************
51 ***************************************************************************
55 * Create completion queue
57 * @v ibdev Infiniband device
58 * @v num_cqes Number of completion queue entries
59 * @v op Completion queue operations
60 * @ret cq New completion queue
62 struct ib_completion_queue *
63 ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
64 struct ib_completion_queue_operations *op ) {
65 struct ib_completion_queue *cq;
68 DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
70 /* Allocate and initialise data structure */
71 cq = zalloc ( sizeof ( *cq ) );
75 list_add ( &cq->list, &ibdev->cqs );
76 cq->num_cqes = num_cqes;
77 INIT_LIST_HEAD ( &cq->work_queues );
80 /* Perform device-specific initialisation and get CQN */
81 if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
82 DBGC ( ibdev, "IBDEV %p could not initialise completion "
83 "queue: %s\n", ibdev, strerror ( rc ) );
84 goto err_dev_create_cq;
87 DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
88 "with CQN %#lx\n", ibdev, num_cqes, cq,
89 ib_cq_get_drvdata ( cq ), cq->cqn );
92 ibdev->op->destroy_cq ( ibdev, cq );
94 list_del ( &cq->list );
101 * Destroy completion queue
103 * @v ibdev Infiniband device
104 * @v cq Completion queue
106 void ib_destroy_cq ( struct ib_device *ibdev,
107 struct ib_completion_queue *cq ) {
108 DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
110 assert ( list_empty ( &cq->work_queues ) );
111 ibdev->op->destroy_cq ( ibdev, cq );
112 list_del ( &cq->list );
117 * Poll completion queue
119 * @v ibdev Infiniband device
120 * @v cq Completion queue
122 void ib_poll_cq ( struct ib_device *ibdev,
123 struct ib_completion_queue *cq ) {
124 struct ib_work_queue *wq;
126 /* Poll completion queue */
127 ibdev->op->poll_cq ( ibdev, cq );
129 /* Refill receive work queues */
130 list_for_each_entry ( wq, &cq->work_queues, list ) {
132 ib_refill_recv ( ibdev, wq->qp );
136 /***************************************************************************
140 ***************************************************************************
146 * @v ibdev Infiniband device
147 * @v type Queue pair type
148 * @v num_send_wqes Number of send work queue entries
149 * @v send_cq Send completion queue
150 * @v num_recv_wqes Number of receive work queue entries
151 * @v recv_cq Receive completion queue
155 struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
156 enum ib_queue_pair_type type,
157 unsigned int num_send_wqes,
158 struct ib_completion_queue *send_cq,
159 unsigned int num_recv_wqes,
160 struct ib_completion_queue *recv_cq,
161 unsigned long qkey ) {
162 struct ib_queue_pair *qp;
166 DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
168 /* Allocate and initialise data structure */
169 total_size = ( sizeof ( *qp ) +
170 ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
171 ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
172 qp = zalloc ( total_size );
176 list_add ( &qp->list, &ibdev->qps );
180 qp->send.is_send = 1;
181 qp->send.cq = send_cq;
182 list_add ( &qp->send.list, &send_cq->work_queues );
183 qp->send.num_wqes = num_send_wqes;
184 qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
186 qp->recv.cq = recv_cq;
187 list_add ( &qp->recv.list, &recv_cq->work_queues );
188 qp->recv.num_wqes = num_recv_wqes;
189 qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
190 ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
191 INIT_LIST_HEAD ( &qp->mgids );
193 /* Perform device-specific initialisation and get QPN */
194 if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
195 DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
196 "%s\n", ibdev, strerror ( rc ) );
197 goto err_dev_create_qp;
199 DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
200 ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
201 DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
202 ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
204 DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
205 ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
206 ( ( ( void * ) qp ) + total_size ) );
208 /* Calculate externally-visible QPN */
211 qp->ext_qpn = IB_QPN_SMA;
214 qp->ext_qpn = IB_QPN_GMA;
217 qp->ext_qpn = qp->qpn;
220 if ( qp->ext_qpn != qp->qpn ) {
221 DBGC ( ibdev, "IBDEV %p QPN %#lx has external QPN %#lx\n",
222 ibdev, qp->qpn, qp->ext_qpn );
227 ibdev->op->destroy_qp ( ibdev, qp );
229 list_del ( &qp->send.list );
230 list_del ( &qp->recv.list );
231 list_del ( &qp->list );
240 * @v ibdev Infiniband device
242 * @v mod_list Modification list
243 * @v qkey New queue key, if applicable
244 * @ret rc Return status code
246 int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp,
247 unsigned long mod_list, unsigned long qkey ) {
250 DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
252 if ( mod_list & IB_MODIFY_QKEY )
255 if ( ( rc = ibdev->op->modify_qp ( ibdev, qp, mod_list ) ) != 0 ) {
256 DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
257 ibdev, qp->qpn, strerror ( rc ) );
267 * @v ibdev Infiniband device
270 void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
271 struct io_buffer *iobuf;
274 DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
277 assert ( list_empty ( &qp->mgids ) );
279 /* Perform device-specific destruction */
280 ibdev->op->destroy_qp ( ibdev, qp );
282 /* Complete any remaining I/O buffers with errors */
283 for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
284 if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
285 ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
287 for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
288 if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
289 ib_complete_recv ( ibdev, qp, NULL, iobuf,
294 /* Remove work queues from completion queue */
295 list_del ( &qp->send.list );
296 list_del ( &qp->recv.list );
299 list_del ( &qp->list );
304 * Find queue pair by QPN
306 * @v ibdev Infiniband device
307 * @v qpn Queue pair number
308 * @ret qp Queue pair, or NULL
310 struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
311 unsigned long qpn ) {
312 struct ib_queue_pair *qp;
314 list_for_each_entry ( qp, &ibdev->qps, list ) {
315 if ( ( qpn == qp->qpn ) || ( qpn == qp->ext_qpn ) )
322 * Find queue pair by multicast GID
324 * @v ibdev Infiniband device
325 * @v gid Multicast GID
326 * @ret qp Queue pair, or NULL
328 struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
329 struct ib_gid *gid ) {
330 struct ib_queue_pair *qp;
331 struct ib_multicast_gid *mgid;
333 list_for_each_entry ( qp, &ibdev->qps, list ) {
334 list_for_each_entry ( mgid, &qp->mgids, list ) {
335 if ( memcmp ( &mgid->gid, gid,
336 sizeof ( mgid->gid ) ) == 0 ) {
345 * Find work queue belonging to completion queue
347 * @v cq Completion queue
348 * @v qpn Queue pair number
349 * @v is_send Find send work queue (rather than receive)
350 * @ret wq Work queue, or NULL if not found
352 struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
353 unsigned long qpn, int is_send ) {
354 struct ib_work_queue *wq;
356 list_for_each_entry ( wq, &cq->work_queues, list ) {
357 if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
364 * Post send work queue entry
366 * @v ibdev Infiniband device
368 * @v av Address vector
369 * @v iobuf I/O buffer
370 * @ret rc Return status code
372 int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
373 struct ib_address_vector *av,
374 struct io_buffer *iobuf ) {
377 /* Check queue fill level */
378 if ( qp->send.fill >= qp->send.num_wqes ) {
379 DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
384 /* Fill in optional parameters in address vector */
388 av->rate = IB_RATE_2_5;
390 /* Post to hardware */
391 if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) {
392 DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
393 "%s\n", ibdev, qp->qpn, strerror ( rc ) );
402 * Post receive work queue entry
404 * @v ibdev Infiniband device
406 * @v iobuf I/O buffer
407 * @ret rc Return status code
409 int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
410 struct io_buffer *iobuf ) {
413 /* Check packet length */
414 if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
415 DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
416 ibdev, qp->qpn, iob_tailroom ( iobuf ) );
420 /* Check queue fill level */
421 if ( qp->recv.fill >= qp->recv.num_wqes ) {
422 DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
427 /* Post to hardware */
428 if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
429 DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
430 "%s\n", ibdev, qp->qpn, strerror ( rc ) );
439 * Complete send work queue entry
441 * @v ibdev Infiniband device
443 * @v iobuf I/O buffer
444 * @v rc Completion status code
446 void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
447 struct io_buffer *iobuf, int rc ) {
449 if ( qp->send.cq->op->complete_send ) {
450 qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
458 * Complete receive work queue entry
460 * @v ibdev Infiniband device
462 * @v av Address vector
463 * @v iobuf I/O buffer
464 * @v rc Completion status code
466 void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
467 struct ib_address_vector *av,
468 struct io_buffer *iobuf, int rc ) {
470 if ( qp->recv.cq->op->complete_recv ) {
471 qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc );
479 * Refill receive work queue
481 * @v ibdev Infiniband device
484 void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
485 struct io_buffer *iobuf;
488 /* Keep filling while unfilled entries remain */
489 while ( qp->recv.fill < qp->recv.num_wqes ) {
491 /* Allocate I/O buffer */
492 iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE );
494 /* Non-fatal; we will refill on next attempt */
498 /* Post I/O buffer */
499 if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
500 DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
501 ibdev, strerror ( rc ) );
509 /***************************************************************************
513 ***************************************************************************
519 * @v ibdev Infiniband device
520 * @ret rc Return status code
522 int ib_open ( struct ib_device *ibdev ) {
525 /* Increment device open request counter */
526 if ( ibdev->open_count++ > 0 ) {
527 /* Device was already open; do nothing */
531 /* Create subnet management agent */
532 ibdev->sma = ib_create_gma ( ibdev, IB_QPT_SMA );
533 if ( ! ibdev->sma ) {
534 DBGC ( ibdev, "IBDEV %p could not create SMA\n", ibdev );
539 /* Create general management agent */
540 ibdev->gma = ib_create_gma ( ibdev, IB_QPT_GMA );
541 if ( ! ibdev->gma ) {
542 DBGC ( ibdev, "IBDEV %p could not create GMA\n", ibdev );
548 if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 ) {
549 DBGC ( ibdev, "IBDEV %p could not open: %s\n",
550 ibdev, strerror ( rc ) );
554 assert ( ibdev->open_count == 1 );
557 ibdev->op->close ( ibdev );
559 ib_destroy_gma ( ibdev->gma );
561 ib_destroy_gma ( ibdev->sma );
563 assert ( ibdev->open_count == 1 );
564 ibdev->open_count = 0;
571 * @v ibdev Infiniband device
573 void ib_close ( struct ib_device *ibdev ) {
575 /* Decrement device open request counter */
578 /* Close device if this was the last remaining requested opening */
579 if ( ibdev->open_count == 0 ) {
580 ib_destroy_gma ( ibdev->gma );
581 ib_destroy_gma ( ibdev->sma );
582 ibdev->op->close ( ibdev );
586 /***************************************************************************
590 ***************************************************************************
594 * Attach to multicast group
596 * @v ibdev Infiniband device
598 * @v gid Multicast GID
599 * @ret rc Return status code
601 * Note that this function handles only the local device's attachment
602 * to the multicast GID; it does not issue the relevant MADs to join
603 * the multicast group on the subnet.
605 int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
606 struct ib_gid *gid ) {
607 struct ib_multicast_gid *mgid;
610 /* Add to software multicast GID list */
611 mgid = zalloc ( sizeof ( *mgid ) );
616 memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
617 list_add ( &mgid->list, &qp->mgids );
619 /* Add to hardware multicast GID list */
620 if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
621 goto err_dev_mcast_attach;
625 err_dev_mcast_attach:
626 list_del ( &mgid->list );
633 * Detach from multicast group
635 * @v ibdev Infiniband device
637 * @v gid Multicast GID
639 void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
640 struct ib_gid *gid ) {
641 struct ib_multicast_gid *mgid;
643 /* Remove from hardware multicast GID list */
644 ibdev->op->mcast_detach ( ibdev, qp, gid );
646 /* Remove from software multicast GID list */
647 list_for_each_entry ( mgid, &qp->mgids, list ) {
648 if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
649 list_del ( &mgid->list );
656 /***************************************************************************
660 ***************************************************************************
664 * Get Infiniband HCA information
666 * @v ibdev Infiniband device
667 * @ret hca_guid HCA GUID
668 * @ret num_ports Number of ports
670 int ib_get_hca_info ( struct ib_device *ibdev,
671 struct ib_gid_half *hca_guid ) {
672 struct ib_device *tmp;
675 /* Search for IB devices with the same physical device to
676 * identify port count and a suitable Node GUID.
678 for_each_ibdev ( tmp ) {
679 if ( tmp->dev != ibdev->dev )
681 if ( num_ports == 0 ) {
682 memcpy ( hca_guid, &tmp->gid.u.half[1],
683 sizeof ( *hca_guid ) );
690 /** Set port information
692 * @v ibdev Infiniband device
693 * @v port_info New port information
695 int ib_set_port_info ( struct ib_device *ibdev,
696 const struct ib_port_info *port_info ) {
699 /* Adapters with embedded SMAs do not need to support this method */
700 if ( ! ibdev->op->set_port_info ) {
701 DBGC ( ibdev, "IBDEV %p does not support setting port "
702 "information\n", ibdev );
706 if ( ( rc = ibdev->op->set_port_info ( ibdev, port_info ) ) != 0 ) {
707 DBGC ( ibdev, "IBDEV %p could not set port information: %s\n",
708 ibdev, strerror ( rc ) );
715 /***************************************************************************
719 ***************************************************************************
723 * Handle Infiniband link state change
725 * @v ibdev Infiniband device
727 void ib_link_state_changed ( struct ib_device *ibdev ) {
729 /* Notify IPoIB of link state change */
730 ipoib_link_state_changed ( ibdev );
736 * @v ibdev Infiniband device
738 void ib_poll_eq ( struct ib_device *ibdev ) {
739 struct ib_completion_queue *cq;
741 /* Poll device's event queue */
742 ibdev->op->poll_eq ( ibdev );
744 /* Poll all completion queues */
745 list_for_each_entry ( cq, &ibdev->cqs, list )
746 ib_poll_cq ( ibdev, cq );
750 * Single-step the Infiniband event queue
752 * @v process Infiniband event queue process
754 static void ib_step ( struct process *process __unused ) {
755 struct ib_device *ibdev;
757 for_each_ibdev ( ibdev )
758 ib_poll_eq ( ibdev );
761 /** Infiniband event queue process */
762 struct process ib_process __permanent_process = {
766 /***************************************************************************
768 * Infiniband device creation/destruction
770 ***************************************************************************
774 * Allocate Infiniband device
776 * @v priv_size Size of driver private data area
777 * @ret ibdev Infiniband device, or NULL
779 struct ib_device * alloc_ibdev ( size_t priv_size ) {
780 struct ib_device *ibdev;
784 total_len = ( sizeof ( *ibdev ) + priv_size );
785 ibdev = zalloc ( total_len );
787 drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
788 ib_set_drvdata ( ibdev, drv_priv );
789 INIT_LIST_HEAD ( &ibdev->cqs );
790 INIT_LIST_HEAD ( &ibdev->qps );
791 ibdev->lid = IB_LID_NONE;
792 ibdev->pkey = IB_PKEY_NONE;
798 * Register Infiniband device
800 * @v ibdev Infiniband device
801 * @ret rc Return status code
803 int register_ibdev ( struct ib_device *ibdev ) {
806 /* Add to device list */
808 list_add_tail ( &ibdev->list, &ib_devices );
810 /* Add IPoIB device */
811 if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
812 DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n",
813 ibdev, strerror ( rc ) );
814 goto err_ipoib_probe;
817 DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
822 list_del ( &ibdev->list );
828 * Unregister Infiniband device
830 * @v ibdev Infiniband device
832 void unregister_ibdev ( struct ib_device *ibdev ) {
835 ipoib_remove ( ibdev );
837 /* Remove from device list */
838 list_del ( &ibdev->list );
840 DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );