2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 FILE_LICENCE ( GPL2_OR_LATER );
29 #include <gpxe/list.h>
30 #include <gpxe/if_arp.h>
31 #include <gpxe/netdevice.h>
32 #include <gpxe/iobuf.h>
33 #include <gpxe/ipoib.h>
34 #include <gpxe/process.h>
35 #include <gpxe/infiniband.h>
43 /** List of Infiniband devices */
44 struct list_head ib_devices = LIST_HEAD_INIT ( ib_devices );
46 /***************************************************************************
50 ***************************************************************************
54 * Create completion queue
56 * @v ibdev Infiniband device
57 * @v num_cqes Number of completion queue entries
58 * @v op Completion queue operations
59 * @ret cq New completion queue
61 struct ib_completion_queue *
62 ib_create_cq ( struct ib_device *ibdev, unsigned int num_cqes,
63 struct ib_completion_queue_operations *op ) {
64 struct ib_completion_queue *cq;
67 DBGC ( ibdev, "IBDEV %p creating completion queue\n", ibdev );
69 /* Allocate and initialise data structure */
70 cq = zalloc ( sizeof ( *cq ) );
74 list_add ( &cq->list, &ibdev->cqs );
75 cq->num_cqes = num_cqes;
76 INIT_LIST_HEAD ( &cq->work_queues );
79 /* Perform device-specific initialisation and get CQN */
80 if ( ( rc = ibdev->op->create_cq ( ibdev, cq ) ) != 0 ) {
81 DBGC ( ibdev, "IBDEV %p could not initialise completion "
82 "queue: %s\n", ibdev, strerror ( rc ) );
83 goto err_dev_create_cq;
86 DBGC ( ibdev, "IBDEV %p created %d-entry completion queue %p (%p) "
87 "with CQN %#lx\n", ibdev, num_cqes, cq,
88 ib_cq_get_drvdata ( cq ), cq->cqn );
91 ibdev->op->destroy_cq ( ibdev, cq );
93 list_del ( &cq->list );
100 * Destroy completion queue
102 * @v ibdev Infiniband device
103 * @v cq Completion queue
105 void ib_destroy_cq ( struct ib_device *ibdev,
106 struct ib_completion_queue *cq ) {
107 DBGC ( ibdev, "IBDEV %p destroying completion queue %#lx\n",
109 assert ( list_empty ( &cq->work_queues ) );
110 ibdev->op->destroy_cq ( ibdev, cq );
111 list_del ( &cq->list );
116 * Poll completion queue
118 * @v ibdev Infiniband device
119 * @v cq Completion queue
121 void ib_poll_cq ( struct ib_device *ibdev,
122 struct ib_completion_queue *cq ) {
123 struct ib_work_queue *wq;
125 /* Poll completion queue */
126 ibdev->op->poll_cq ( ibdev, cq );
128 /* Refill receive work queues */
129 list_for_each_entry ( wq, &cq->work_queues, list ) {
131 ib_refill_recv ( ibdev, wq->qp );
135 /***************************************************************************
139 ***************************************************************************
145 * @v ibdev Infiniband device
146 * @v num_send_wqes Number of send work queue entries
147 * @v send_cq Send completion queue
148 * @v num_recv_wqes Number of receive work queue entries
149 * @v recv_cq Receive completion queue
153 struct ib_queue_pair * ib_create_qp ( struct ib_device *ibdev,
154 unsigned int num_send_wqes,
155 struct ib_completion_queue *send_cq,
156 unsigned int num_recv_wqes,
157 struct ib_completion_queue *recv_cq,
158 unsigned long qkey ) {
159 struct ib_queue_pair *qp;
163 DBGC ( ibdev, "IBDEV %p creating queue pair\n", ibdev );
165 /* Allocate and initialise data structure */
166 total_size = ( sizeof ( *qp ) +
167 ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ) +
168 ( num_recv_wqes * sizeof ( qp->recv.iobufs[0] ) ) );
169 qp = zalloc ( total_size );
173 list_add ( &qp->list, &ibdev->qps );
176 qp->send.is_send = 1;
177 qp->send.cq = send_cq;
178 list_add ( &qp->send.list, &send_cq->work_queues );
179 qp->send.num_wqes = num_send_wqes;
180 qp->send.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) );
182 qp->recv.cq = recv_cq;
183 list_add ( &qp->recv.list, &recv_cq->work_queues );
184 qp->recv.num_wqes = num_recv_wqes;
185 qp->recv.iobufs = ( ( ( void * ) qp ) + sizeof ( *qp ) +
186 ( num_send_wqes * sizeof ( qp->send.iobufs[0] ) ));
187 INIT_LIST_HEAD ( &qp->mgids );
189 /* Perform device-specific initialisation and get QPN */
190 if ( ( rc = ibdev->op->create_qp ( ibdev, qp ) ) != 0 ) {
191 DBGC ( ibdev, "IBDEV %p could not initialise queue pair: "
192 "%s\n", ibdev, strerror ( rc ) );
193 goto err_dev_create_qp;
196 DBGC ( ibdev, "IBDEV %p created queue pair %p (%p) with QPN %#lx\n",
197 ibdev, qp, ib_qp_get_drvdata ( qp ), qp->qpn );
198 DBGC ( ibdev, "IBDEV %p QPN %#lx has %d send entries at [%p,%p)\n",
199 ibdev, qp->qpn, num_send_wqes, qp->send.iobufs,
201 DBGC ( ibdev, "IBDEV %p QPN %#lx has %d receive entries at [%p,%p)\n",
202 ibdev, qp->qpn, num_recv_wqes, qp->recv.iobufs,
203 ( ( ( void * ) qp ) + total_size ) );
206 ibdev->op->destroy_qp ( ibdev, qp );
208 list_del ( &qp->send.list );
209 list_del ( &qp->recv.list );
210 list_del ( &qp->list );
219 * @v ibdev Infiniband device
221 * @v mod_list Modification list
222 * @v qkey New queue key, if applicable
223 * @ret rc Return status code
225 int ib_modify_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp,
226 unsigned long mod_list, unsigned long qkey ) {
229 DBGC ( ibdev, "IBDEV %p modifying QPN %#lx\n", ibdev, qp->qpn );
231 if ( mod_list & IB_MODIFY_QKEY )
234 if ( ( rc = ibdev->op->modify_qp ( ibdev, qp, mod_list ) ) != 0 ) {
235 DBGC ( ibdev, "IBDEV %p could not modify QPN %#lx: %s\n",
236 ibdev, qp->qpn, strerror ( rc ) );
246 * @v ibdev Infiniband device
249 void ib_destroy_qp ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
250 struct io_buffer *iobuf;
253 DBGC ( ibdev, "IBDEV %p destroying QPN %#lx\n",
256 assert ( list_empty ( &qp->mgids ) );
258 /* Perform device-specific destruction */
259 ibdev->op->destroy_qp ( ibdev, qp );
261 /* Complete any remaining I/O buffers with errors */
262 for ( i = 0 ; i < qp->send.num_wqes ; i++ ) {
263 if ( ( iobuf = qp->send.iobufs[i] ) != NULL )
264 ib_complete_send ( ibdev, qp, iobuf, -ECANCELED );
266 for ( i = 0 ; i < qp->recv.num_wqes ; i++ ) {
267 if ( ( iobuf = qp->recv.iobufs[i] ) != NULL ) {
268 ib_complete_recv ( ibdev, qp, NULL, iobuf,
273 /* Remove work queues from completion queue */
274 list_del ( &qp->send.list );
275 list_del ( &qp->recv.list );
278 list_del ( &qp->list );
283 * Find queue pair by QPN
285 * @v ibdev Infiniband device
286 * @v qpn Queue pair number
287 * @ret qp Queue pair, or NULL
289 struct ib_queue_pair * ib_find_qp_qpn ( struct ib_device *ibdev,
290 unsigned long qpn ) {
291 struct ib_queue_pair *qp;
293 list_for_each_entry ( qp, &ibdev->qps, list ) {
294 if ( qp->qpn == qpn )
301 * Find queue pair by multicast GID
303 * @v ibdev Infiniband device
304 * @v gid Multicast GID
305 * @ret qp Queue pair, or NULL
307 struct ib_queue_pair * ib_find_qp_mgid ( struct ib_device *ibdev,
308 struct ib_gid *gid ) {
309 struct ib_queue_pair *qp;
310 struct ib_multicast_gid *mgid;
312 list_for_each_entry ( qp, &ibdev->qps, list ) {
313 list_for_each_entry ( mgid, &qp->mgids, list ) {
314 if ( memcmp ( &mgid->gid, gid,
315 sizeof ( mgid->gid ) ) == 0 ) {
324 * Find work queue belonging to completion queue
326 * @v cq Completion queue
327 * @v qpn Queue pair number
328 * @v is_send Find send work queue (rather than receive)
329 * @ret wq Work queue, or NULL if not found
331 struct ib_work_queue * ib_find_wq ( struct ib_completion_queue *cq,
332 unsigned long qpn, int is_send ) {
333 struct ib_work_queue *wq;
335 list_for_each_entry ( wq, &cq->work_queues, list ) {
336 if ( ( wq->qp->qpn == qpn ) && ( wq->is_send == is_send ) )
343 * Post send work queue entry
345 * @v ibdev Infiniband device
347 * @v av Address vector
348 * @v iobuf I/O buffer
349 * @ret rc Return status code
351 int ib_post_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
352 struct ib_address_vector *av,
353 struct io_buffer *iobuf ) {
356 /* Check queue fill level */
357 if ( qp->send.fill >= qp->send.num_wqes ) {
358 DBGC ( ibdev, "IBDEV %p QPN %#lx send queue full\n",
363 /* Post to hardware */
364 if ( ( rc = ibdev->op->post_send ( ibdev, qp, av, iobuf ) ) != 0 ) {
365 DBGC ( ibdev, "IBDEV %p QPN %#lx could not post send WQE: "
366 "%s\n", ibdev, qp->qpn, strerror ( rc ) );
375 * Post receive work queue entry
377 * @v ibdev Infiniband device
379 * @v iobuf I/O buffer
380 * @ret rc Return status code
382 int ib_post_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
383 struct io_buffer *iobuf ) {
386 /* Check packet length */
387 if ( iob_tailroom ( iobuf ) < IB_MAX_PAYLOAD_SIZE ) {
388 DBGC ( ibdev, "IBDEV %p QPN %#lx wrong RX buffer size (%zd)\n",
389 ibdev, qp->qpn, iob_tailroom ( iobuf ) );
393 /* Check queue fill level */
394 if ( qp->recv.fill >= qp->recv.num_wqes ) {
395 DBGC ( ibdev, "IBDEV %p QPN %#lx receive queue full\n",
400 /* Post to hardware */
401 if ( ( rc = ibdev->op->post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
402 DBGC ( ibdev, "IBDEV %p QPN %#lx could not post receive WQE: "
403 "%s\n", ibdev, qp->qpn, strerror ( rc ) );
412 * Complete send work queue entry
414 * @v ibdev Infiniband device
416 * @v iobuf I/O buffer
417 * @v rc Completion status code
419 void ib_complete_send ( struct ib_device *ibdev, struct ib_queue_pair *qp,
420 struct io_buffer *iobuf, int rc ) {
421 qp->send.cq->op->complete_send ( ibdev, qp, iobuf, rc );
426 * Complete receive work queue entry
428 * @v ibdev Infiniband device
430 * @v av Address vector
431 * @v iobuf I/O buffer
432 * @v rc Completion status code
434 void ib_complete_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp,
435 struct ib_address_vector *av,
436 struct io_buffer *iobuf, int rc ) {
437 qp->recv.cq->op->complete_recv ( ibdev, qp, av, iobuf, rc );
442 * Refill receive work queue
444 * @v ibdev Infiniband device
447 void ib_refill_recv ( struct ib_device *ibdev, struct ib_queue_pair *qp ) {
448 struct io_buffer *iobuf;
451 /* Keep filling while unfilled entries remain */
452 while ( qp->recv.fill < qp->recv.num_wqes ) {
454 /* Allocate I/O buffer */
455 iobuf = alloc_iob ( IB_MAX_PAYLOAD_SIZE );
457 /* Non-fatal; we will refill on next attempt */
461 /* Post I/O buffer */
462 if ( ( rc = ib_post_recv ( ibdev, qp, iobuf ) ) != 0 ) {
463 DBGC ( ibdev, "IBDEV %p could not refill: %s\n",
464 ibdev, strerror ( rc ) );
472 /***************************************************************************
476 ***************************************************************************
482 * @v ibdev Infiniband device
483 * @ret rc Return status code
485 int ib_open ( struct ib_device *ibdev ) {
488 /* Open device if this is the first requested opening */
489 if ( ibdev->open_count == 0 ) {
490 if ( ( rc = ibdev->op->open ( ibdev ) ) != 0 )
494 /* Increment device open request counter */
503 * @v ibdev Infiniband device
505 void ib_close ( struct ib_device *ibdev ) {
507 /* Decrement device open request counter */
510 /* Close device if this was the last remaining requested opening */
511 if ( ibdev->open_count == 0 )
512 ibdev->op->close ( ibdev );
515 /***************************************************************************
519 ***************************************************************************
523 * Attach to multicast group
525 * @v ibdev Infiniband device
527 * @v gid Multicast GID
528 * @ret rc Return status code
530 int ib_mcast_attach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
531 struct ib_gid *gid ) {
532 struct ib_multicast_gid *mgid;
535 /* Add to software multicast GID list */
536 mgid = zalloc ( sizeof ( *mgid ) );
541 memcpy ( &mgid->gid, gid, sizeof ( mgid->gid ) );
542 list_add ( &mgid->list, &qp->mgids );
544 /* Add to hardware multicast GID list */
545 if ( ( rc = ibdev->op->mcast_attach ( ibdev, qp, gid ) ) != 0 )
546 goto err_dev_mcast_attach;
550 err_dev_mcast_attach:
551 list_del ( &mgid->list );
558 * Detach from multicast group
560 * @v ibdev Infiniband device
562 * @v gid Multicast GID
564 void ib_mcast_detach ( struct ib_device *ibdev, struct ib_queue_pair *qp,
565 struct ib_gid *gid ) {
566 struct ib_multicast_gid *mgid;
568 /* Remove from hardware multicast GID list */
569 ibdev->op->mcast_detach ( ibdev, qp, gid );
571 /* Remove from software multicast GID list */
572 list_for_each_entry ( mgid, &qp->mgids, list ) {
573 if ( memcmp ( &mgid->gid, gid, sizeof ( mgid->gid ) ) == 0 ) {
574 list_del ( &mgid->list );
581 /***************************************************************************
585 ***************************************************************************
589 * Get Infiniband HCA information
591 * @v ibdev Infiniband device
592 * @ret hca_guid HCA GUID
593 * @ret num_ports Number of ports
595 int ib_get_hca_info ( struct ib_device *ibdev,
596 struct ib_gid_half *hca_guid ) {
597 struct ib_device *tmp;
600 /* Search for IB devices with the same physical device to
601 * identify port count and a suitable Node GUID.
603 for_each_ibdev ( tmp ) {
604 if ( tmp->dev != ibdev->dev )
606 if ( num_ports == 0 ) {
607 memcpy ( hca_guid, &tmp->gid.u.half[1],
608 sizeof ( *hca_guid ) );
615 /***************************************************************************
619 ***************************************************************************
623 * Handle Infiniband link state change
625 * @v ibdev Infiniband device
627 void ib_link_state_changed ( struct ib_device *ibdev ) {
629 /* Notify IPoIB of link state change */
630 ipoib_link_state_changed ( ibdev );
636 * @v ibdev Infiniband device
638 void ib_poll_eq ( struct ib_device *ibdev ) {
639 struct ib_completion_queue *cq;
641 /* Poll device's event queue */
642 ibdev->op->poll_eq ( ibdev );
644 /* Poll all completion queues */
645 list_for_each_entry ( cq, &ibdev->cqs, list )
646 ib_poll_cq ( ibdev, cq );
650 * Single-step the Infiniband event queue
652 * @v process Infiniband event queue process
654 static void ib_step ( struct process *process __unused ) {
655 struct ib_device *ibdev;
657 for_each_ibdev ( ibdev )
658 ib_poll_eq ( ibdev );
661 /** Infiniband event queue process */
662 struct process ib_process __permanent_process = {
666 /***************************************************************************
668 * Infiniband device creation/destruction
670 ***************************************************************************
674 * Allocate Infiniband device
676 * @v priv_size Size of driver private data area
677 * @ret ibdev Infiniband device, or NULL
679 struct ib_device * alloc_ibdev ( size_t priv_size ) {
680 struct ib_device *ibdev;
684 total_len = ( sizeof ( *ibdev ) + priv_size );
685 ibdev = zalloc ( total_len );
687 drv_priv = ( ( ( void * ) ibdev ) + sizeof ( *ibdev ) );
688 ib_set_drvdata ( ibdev, drv_priv );
689 INIT_LIST_HEAD ( &ibdev->cqs );
690 INIT_LIST_HEAD ( &ibdev->qps );
691 ibdev->lid = IB_LID_NONE;
692 ibdev->pkey = IB_PKEY_NONE;
698 * Register Infiniband device
700 * @v ibdev Infiniband device
701 * @ret rc Return status code
703 int register_ibdev ( struct ib_device *ibdev ) {
706 /* Add to device list */
708 list_add_tail ( &ibdev->list, &ib_devices );
710 /* Add IPoIB device */
711 if ( ( rc = ipoib_probe ( ibdev ) ) != 0 ) {
712 DBGC ( ibdev, "IBDEV %p could not add IPoIB device: %s\n",
713 ibdev, strerror ( rc ) );
714 goto err_ipoib_probe;
717 DBGC ( ibdev, "IBDEV %p registered (phys %s)\n", ibdev,
722 list_del ( &ibdev->list );
728 * Unregister Infiniband device
730 * @v ibdev Infiniband device
732 void unregister_ibdev ( struct ib_device *ibdev ) {
735 ipoib_remove ( ibdev );
737 /* Remove from device list */
738 list_del ( &ibdev->list );
740 DBGC ( ibdev, "IBDEV %p unregistered\n", ibdev );