2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26 #include <gpxe/if_arp.h>
27 #include <gpxe/iobuf.h>
28 #include <gpxe/netdevice.h>
29 #include <gpxe/infiniband.h>
30 #include <gpxe/ipoib.h>
41 extern unsigned long hack_ipoib_qkey;
42 extern struct ib_address_vector hack_ipoib_bcast_av;
47 #define IPOIB_MTU 2048
49 /** Number of IPoIB data send work queue entries */
50 #define IPOIB_DATA_NUM_SEND_WQES 2
52 /** Number of IPoIB data receive work queue entries */
53 #define IPOIB_DATA_NUM_RECV_WQES 2
55 /** Number of IPoIB data completion entries */
56 #define IPOIB_DATA_NUM_CQES 32
58 /** Number of IPoIB metadata send work queue entries */
59 #define IPOIB_META_NUM_SEND_WQES 2
61 /** Number of IPoIB metadata receive work queue entries */
62 #define IPOIB_META_NUM_RECV_WQES 2
64 /** Number of IPoIB metadata completion entries */
65 #define IPOIB_META_NUM_CQES 32
67 /** An IPoIB queue set */
68 struct ipoib_queue_set {
69 /** Completion queue */
70 struct ib_completion_queue *cq;
72 struct ib_queue_pair *qp;
73 /** Receive work queue fill level */
74 unsigned int recv_fill;
75 /** Receive work queue maximum fill level */
76 unsigned int recv_max_fill;
79 /** An IPoIB device */
82 struct net_device *netdev;
83 /** Underlying Infiniband device */
84 struct ib_device *ibdev;
86 struct ipoib_queue_set data;
88 struct ipoib_queue_set meta;
90 struct ib_gid broadcast_gid;
92 unsigned int broadcast_lid;
93 /** Joined to broadcast group */
96 unsigned long data_qkey;
100 * IPoIB path cache entry
102 * This serves a similar role to the ARP cache for Ethernet. (ARP
103 * *is* used on IPoIB; we have two caches to maintain.)
105 struct ipoib_cached_path {
106 /** Destination GID */
108 /** Destination LID */
116 /** Number of IPoIB path cache entries */
117 #define IPOIB_NUM_CACHED_PATHS 2
119 /** IPoIB path cache */
120 static struct ipoib_cached_path ipoib_path_cache[IPOIB_NUM_CACHED_PATHS];
122 /** Oldest IPoIB path cache entry index */
123 static unsigned int ipoib_path_cache_idx = 0;
125 /** TID half used to identify get path record replies */
126 #define IPOIB_TID_GET_PATH_REC 0x11111111UL
128 /** TID half used to identify multicast member record replies */
129 #define IPOIB_TID_MC_MEMBER_REC 0x22222222UL
131 /** IPoIB metadata TID */
132 static uint32_t ipoib_meta_tid = 0;
134 /** IPv4 broadcast GID */
135 static const struct ib_gid ipv4_broadcast_gid = {
136 { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
137 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } }
140 /** Maximum time we will wait for the broadcast join to succeed */
141 #define IPOIB_JOIN_MAX_DELAY_MS 1000
143 /****************************************************************************
147 ****************************************************************************
150 /** Broadcast QPN used in IPoIB MAC addresses
152 * This is a guaranteed invalid real QPN
154 #define IPOIB_BROADCAST_QPN 0xffffffffUL
156 /** Broadcast IPoIB address */
157 static struct ipoib_mac ipoib_broadcast = {
158 .qpn = ntohl ( IPOIB_BROADCAST_QPN ),
162 * Transmit IPoIB packet
164 * @v iobuf I/O buffer
165 * @v netdev Network device
166 * @v net_protocol Network-layer protocol
167 * @v ll_dest Link-layer destination address
169 * Prepends the IPoIB link-layer header and transmits the packet.
171 static int ipoib_tx ( struct io_buffer *iobuf, struct net_device *netdev,
172 struct net_protocol *net_protocol,
173 const void *ll_dest ) {
174 struct ipoib_hdr *ipoib_hdr =
175 iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
177 /* Build IPoIB header */
178 memcpy ( &ipoib_hdr->pseudo.peer, ll_dest,
179 sizeof ( ipoib_hdr->pseudo.peer ) );
180 ipoib_hdr->real.proto = net_protocol->net_proto;
181 ipoib_hdr->real.reserved = 0;
183 /* Hand off to network device */
184 return netdev_tx ( netdev, iobuf );
188 * Process received IPoIB packet
190 * @v iobuf I/O buffer
191 * @v netdev Network device
193 * Strips off the IPoIB link-layer header and passes up to the
194 * network-layer protocol.
196 static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) {
197 struct ipoib_hdr *ipoib_hdr = iobuf->data;
200 if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
201 DBG ( "IPoIB packet too short for link-layer header\n" );
202 DBG_HD ( iobuf->data, iob_len ( iobuf ) );
207 /* Strip off IPoIB header */
208 int len = iob_len ( iobuf );
209 DBG ( "WTF iob_len = %zd\n", len );
211 DBG_HD ( iobuf, sizeof ( *iobuf ) );
216 iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
218 /* Hand off to network-layer protocol */
219 return net_rx ( iobuf, netdev, ipoib_hdr->real.proto,
220 &ipoib_hdr->pseudo.peer );
224 * Transcribe IPoIB address
226 * @v ll_addr Link-layer address
227 * @ret string Link-layer address in human-readable format
229 const char * ipoib_ntoa ( const void *ll_addr ) {
231 const struct ipoib_mac *mac = ll_addr;
233 snprintf ( buf, sizeof ( buf ), "%08lx:%08lx:%08lx:%08lx:%08lx",
234 htonl ( mac->qpn ), htonl ( mac->gid.u.dwords[0] ),
235 htonl ( mac->gid.u.dwords[1] ),
236 htonl ( mac->gid.u.dwords[2] ),
237 htonl ( mac->gid.u.dwords[3] ) );
241 /** IPoIB protocol */
242 struct ll_protocol ipoib_protocol __ll_protocol = {
244 .ll_proto = htons ( ARPHRD_INFINIBAND ),
245 .ll_addr_len = IPOIB_ALEN,
246 .ll_header_len = IPOIB_HLEN,
247 .ll_broadcast = ( uint8_t * ) &ipoib_broadcast,
253 /****************************************************************************
255 * IPoIB network device
257 ****************************************************************************
263 * @v ipoib IPoIB device
266 static void ipoib_destroy_qset ( struct ipoib_device *ipoib,
267 struct ipoib_queue_set *qset ) {
268 struct ib_device *ibdev = ipoib->ibdev;
271 ib_destroy_qp ( ibdev, qset->qp );
273 ib_destroy_cq ( ibdev, qset->cq );
274 memset ( qset, 0, sizeof ( *qset ) );
280 * @v ipoib IPoIB device
282 * @ret rc Return status code
284 static int ipoib_create_qset ( struct ipoib_device *ipoib,
285 struct ipoib_queue_set *qset,
286 unsigned int num_cqes,
287 unsigned int num_send_wqes,
288 unsigned int num_recv_wqes,
289 unsigned long qkey ) {
290 struct ib_device *ibdev = ipoib->ibdev;
293 /* Store queue parameters */
294 qset->recv_max_fill = num_recv_wqes;
296 /* Allocate completion queue */
297 qset->cq = ib_create_cq ( ibdev, num_cqes );
299 DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n",
305 /* Allocate queue pair */
306 qset->qp = ib_create_qp ( ibdev, num_send_wqes, qset->cq,
307 num_recv_wqes, qset->cq, qkey );
309 DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n",
314 qset->qp->owner_priv = ipoib->netdev;
319 ipoib_destroy_qset ( ipoib, qset );
324 * Find path cache entry by GID
327 * @ret entry Path cache entry, or NULL
329 static struct ipoib_cached_path *
330 ipoib_find_cached_path ( struct ib_gid *gid ) {
331 struct ipoib_cached_path *path;
334 for ( i = 0 ; i < IPOIB_NUM_CACHED_PATHS ; i++ ) {
335 path = &ipoib_path_cache[i];
336 if ( memcmp ( &path->gid, gid, sizeof ( *gid ) ) == 0 )
339 DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx cache miss\n",
340 htonl ( gid->u.dwords[0] ), htonl ( gid->u.dwords[1] ),
341 htonl ( gid->u.dwords[2] ), htonl ( gid->u.dwords[3] ) );
346 * Transmit path record request
348 * @v ipoib IPoIB device
349 * @v gid Destination GID
350 * @ret rc Return status code
352 static int ipoib_get_path_record ( struct ipoib_device *ipoib,
353 struct ib_gid *gid ) {
354 struct ib_device *ibdev = ipoib->ibdev;
355 struct io_buffer *iobuf;
356 struct ib_mad_path_record *path_record;
357 struct ib_address_vector av;
360 /* Allocate I/O buffer */
361 iobuf = alloc_iob ( sizeof ( *path_record ) );
364 iob_put ( iobuf, sizeof ( *path_record ) );
365 path_record = iobuf->data;
366 memset ( path_record, 0, sizeof ( *path_record ) );
368 /* Construct path record request */
369 path_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
370 path_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
371 path_record->mad_hdr.class_version = 2;
372 path_record->mad_hdr.method = IB_MGMT_METHOD_GET;
373 path_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC );
374 path_record->mad_hdr.tid[0] = IPOIB_TID_GET_PATH_REC;
375 path_record->mad_hdr.tid[1] = ipoib_meta_tid++;
376 path_record->sa_hdr.comp_mask[1] =
377 htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID );
378 memcpy ( &path_record->dgid, gid, sizeof ( path_record->dgid ) );
379 memcpy ( &path_record->sgid, &ibdev->port_gid,
380 sizeof ( path_record->sgid ) );
382 /* Construct address vector */
383 memset ( &av, 0, sizeof ( av ) );
384 av.dlid = ibdev->sm_lid;
385 av.dest_qp = IB_SA_QPN;
386 av.qkey = IB_GLOBAL_QKEY;
388 /* Post send request */
389 if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av,
391 DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n",
392 ipoib, strerror ( rc ) );
401 * Transmit multicast group membership request
403 * @v ipoib IPoIB device
404 * @v gid Multicast GID
405 * @v join Join (rather than leave) group
406 * @ret rc Return status code
408 static int ipoib_mc_member_record ( struct ipoib_device *ipoib,
409 struct ib_gid *gid, int join ) {
410 struct ib_device *ibdev = ipoib->ibdev;
411 struct io_buffer *iobuf;
412 struct ib_mad_mc_member_record *mc_member_record;
413 struct ib_address_vector av;
416 /* Allocate I/O buffer */
417 iobuf = alloc_iob ( sizeof ( *mc_member_record ) );
420 iob_put ( iobuf, sizeof ( *mc_member_record ) );
421 mc_member_record = iobuf->data;
422 memset ( mc_member_record, 0, sizeof ( *mc_member_record ) );
424 /* Construct path record request */
425 mc_member_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
426 mc_member_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
427 mc_member_record->mad_hdr.class_version = 2;
428 mc_member_record->mad_hdr.method =
429 ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE );
430 mc_member_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC );
431 mc_member_record->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC;
432 mc_member_record->mad_hdr.tid[1] = ipoib_meta_tid++;
433 mc_member_record->sa_hdr.comp_mask[1] =
434 htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
435 IB_SA_MCMEMBER_REC_JOIN_STATE );
436 mc_member_record->scope__join_state = 1;
437 memcpy ( &mc_member_record->mgid, gid,
438 sizeof ( mc_member_record->mgid ) );
439 memcpy ( &mc_member_record->port_gid, &ibdev->port_gid,
440 sizeof ( mc_member_record->port_gid ) );
442 /* Construct address vector */
443 memset ( &av, 0, sizeof ( av ) );
444 av.dlid = ibdev->sm_lid;
445 av.dest_qp = IB_SA_QPN;
446 av.qkey = IB_GLOBAL_QKEY;
448 /* Post send request */
449 if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av,
451 DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n",
452 ipoib, strerror ( rc ) );
461 * Transmit packet via IPoIB network device
463 * @v netdev Network device
464 * @v iobuf I/O buffer
465 * @ret rc Return status code
467 static int ipoib_transmit ( struct net_device *netdev,
468 struct io_buffer *iobuf ) {
469 struct ipoib_device *ipoib = netdev->priv;
470 struct ib_device *ibdev = ipoib->ibdev;
471 struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data;
472 struct ib_address_vector av;
474 struct ipoib_cached_path *path;
478 if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) {
479 DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
482 iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) );
484 /* Construct address vector */
485 memset ( &av, 0, sizeof ( av ) );
486 av.qkey = IB_GLOBAL_QKEY;
488 if ( ipoib_pshdr->peer.qpn == htonl ( IPOIB_BROADCAST_QPN ) ) {
489 /* Broadcast address */
491 memcpy ( &av, &hack_ipoib_bcast_av, sizeof ( av ) );
493 av.dest_qp = IB_BROADCAST_QPN;
494 av.dlid = ipoib->broadcast_lid;
495 gid = &ipoib->broadcast_gid;
497 /* Unicast - look in path cache */
498 path = ipoib_find_cached_path ( &ipoib_pshdr->peer.gid );
500 /* No path entry - get path record */
501 rc = ipoib_get_path_record ( ipoib,
502 &ipoib_pshdr->peer.gid );
503 netdev_tx_complete ( netdev, iobuf );
506 av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn );
507 av.dlid = path->dlid;
508 av.rate = path->rate;
510 gid = &ipoib_pshdr->peer.gid;
512 memcpy ( &av.gid, gid, sizeof ( av.gid ) );
514 return ib_post_send ( ibdev, ipoib->data.qp, &av, iobuf );
518 * Handle IPoIB data send completion
520 * @v ibdev Infiniband device
522 * @v completion Completion
523 * @v iobuf I/O buffer
525 static void ipoib_data_complete_send ( struct ib_device *ibdev __unused,
526 struct ib_queue_pair *qp,
527 struct ib_completion *completion,
528 struct io_buffer *iobuf ) {
529 struct net_device *netdev = qp->owner_priv;
531 netdev_tx_complete_err ( netdev, iobuf,
532 ( completion->syndrome ? -EIO : 0 ) );
536 * Handle IPoIB data receive completion
538 * @v ibdev Infiniband device
540 * @v completion Completion
541 * @v iobuf I/O buffer
543 static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused,
544 struct ib_queue_pair *qp,
545 struct ib_completion *completion,
546 struct io_buffer *iobuf ) {
547 struct net_device *netdev = qp->owner_priv;
548 struct ipoib_device *ipoib = netdev->priv;
549 struct ipoib_pseudo_hdr *ipoib_pshdr;
551 if ( completion->syndrome ) {
552 netdev_rx_err ( netdev, iobuf, -EIO );
556 iob_put ( iobuf, completion->len );
557 if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) {
558 DBGC ( ipoib, "IPoIB %p received data packet too short to "
559 "contain GRH\n", ipoib );
560 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
561 netdev_rx_err ( netdev, iobuf, -EIO );
564 iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) );
566 if ( iob_len ( iobuf ) < sizeof ( struct ipoib_real_hdr ) ) {
567 DBGC ( ipoib, "IPoIB %p received data packet too short to "
568 "contain IPoIB header\n", ipoib );
569 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
570 netdev_rx_err ( netdev, iobuf, -EIO );
574 ipoib_pshdr = iob_push ( iobuf, sizeof ( *ipoib_pshdr ) );
575 /* FIXME: fill in a MAC address for the sake of AoE! */
577 netdev_rx ( netdev, iobuf );
580 ipoib->data.recv_fill--;
584 * Handle IPoIB metadata send completion
586 * @v ibdev Infiniband device
588 * @v completion Completion
589 * @v iobuf I/O buffer
591 static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused,
592 struct ib_queue_pair *qp,
593 struct ib_completion *completion,
594 struct io_buffer *iobuf ) {
595 struct net_device *netdev = qp->owner_priv;
596 struct ipoib_device *ipoib = netdev->priv;
598 if ( completion->syndrome ) {
599 DBGC ( ipoib, "IPoIB %p metadata TX completion error %x\n",
600 ipoib, completion->syndrome );
606 * Handle received IPoIB path record
608 * @v ipoib IPoIB device
609 * @v path_record Path record
611 static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused,
612 struct ib_mad_path_record *path_record ) {
613 struct ipoib_cached_path *path;
615 /* Update path cache entry */
616 path = &ipoib_path_cache[ipoib_path_cache_idx];
617 memcpy ( &path->gid, &path_record->dgid, sizeof ( path->gid ) );
618 path->dlid = ntohs ( path_record->dlid );
619 path->sl = ( path_record->reserved__sl & 0x0f );
620 path->rate = ( path_record->rate_selector__rate & 0x3f );
622 DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n",
623 htonl ( path->gid.u.dwords[0] ), htonl ( path->gid.u.dwords[1] ),
624 htonl ( path->gid.u.dwords[2] ), htonl ( path->gid.u.dwords[3] ),
625 path->dlid, path->sl, path->rate );
627 /* Update path cache index */
628 ipoib_path_cache_idx++;
629 if ( ipoib_path_cache_idx == IPOIB_NUM_CACHED_PATHS )
630 ipoib_path_cache_idx = 0;
634 * Handle received IPoIB multicast membership record
636 * @v ipoib IPoIB device
637 * @v mc_member_record Multicast membership record
639 static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib,
640 struct ib_mad_mc_member_record *mc_member_record ) {
641 /* Record parameters */
642 ipoib->broadcast_joined =
643 ( mc_member_record->scope__join_state & 0x0f );
644 ipoib->data_qkey = ntohl ( mc_member_record->qkey );
645 ipoib->broadcast_lid = ntohs ( mc_member_record->mlid );
646 DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx mlid %x\n",
647 ipoib, ( ipoib->broadcast_joined ? "joined" : "left" ),
648 ipoib->data_qkey, ipoib->broadcast_lid );
652 * Handle IPoIB metadata receive completion
654 * @v ibdev Infiniband device
656 * @v completion Completion
657 * @v iobuf I/O buffer
659 static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused,
660 struct ib_queue_pair *qp,
661 struct ib_completion *completion,
662 struct io_buffer *iobuf ) {
663 struct net_device *netdev = qp->owner_priv;
664 struct ipoib_device *ipoib = netdev->priv;
667 if ( completion->syndrome ) {
668 DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n",
669 ipoib, completion->syndrome );
673 iob_put ( iobuf, completion->len );
674 if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) {
675 DBGC ( ipoib, "IPoIB %p received metadata packet too short "
676 "to contain GRH\n", ipoib );
677 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
680 iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) );
681 if ( iob_len ( iobuf ) < sizeof ( *mad ) ) {
682 DBGC ( ipoib, "IPoIB %p received metadata packet too short "
683 "to contain reply\n", ipoib );
684 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
689 if ( mad->mad_hdr.status != 0 ) {
690 DBGC ( ipoib, "IPoIB %p metadata RX err status %04x\n",
691 ipoib, ntohs ( mad->mad_hdr.status ) );
695 switch ( mad->mad_hdr.tid[0] ) {
696 case IPOIB_TID_GET_PATH_REC:
697 ipoib_recv_path_record ( ipoib, &mad->path_record );
699 case IPOIB_TID_MC_MEMBER_REC:
700 ipoib_recv_mc_member_record ( ipoib, &mad->mc_member_record );
703 DBGC ( ipoib, "IPoIB %p unwanted response:\n",
705 DBGC_HD ( ipoib, mad, sizeof ( *mad ) );
710 ipoib->meta.recv_fill--;
715 * Refill IPoIB receive ring
717 * @v ipoib IPoIB device
719 static void ipoib_refill_recv ( struct ipoib_device *ipoib,
720 struct ipoib_queue_set *qset ) {
721 struct ib_device *ibdev = ipoib->ibdev;
722 struct io_buffer *iobuf;
725 while ( qset->recv_fill < qset->recv_max_fill ) {
726 iobuf = alloc_iob ( IPOIB_MTU );
729 if ( ( rc = ib_post_recv ( ibdev, qset->qp, iobuf ) ) != 0 ) {
738 * Poll IPoIB network device
740 * @v netdev Network device
742 static void ipoib_poll ( struct net_device *netdev ) {
743 struct ipoib_device *ipoib = netdev->priv;
744 struct ib_device *ibdev = ipoib->ibdev;
746 ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send,
747 ipoib_meta_complete_recv );
748 ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send,
749 ipoib_data_complete_recv );
750 ipoib_refill_recv ( ipoib, &ipoib->meta );
751 ipoib_refill_recv ( ipoib, &ipoib->data );
755 * Enable/disable interrupts on IPoIB network device
757 * @v netdev Network device
758 * @v enable Interrupts should be enabled
760 static void ipoib_irq ( struct net_device *netdev __unused,
761 int enable __unused ) {
762 /* No implementation */
766 * Open IPoIB network device
768 * @v netdev Network device
769 * @ret rc Return status code
771 static int ipoib_open ( struct net_device *netdev ) {
772 struct ipoib_device *ipoib = netdev->priv;
773 struct ib_device *ibdev = ipoib->ibdev;
776 /* Attach to broadcast multicast GID */
777 if ( ( rc = ib_mcast_attach ( ibdev, ipoib->data.qp,
778 &ipoib->broadcast_gid ) ) != 0 ) {
779 DBG ( "Could not attach to broadcast GID: %s\n",
784 /* Fill receive rings */
785 ipoib_refill_recv ( ipoib, &ipoib->meta );
786 ipoib_refill_recv ( ipoib, &ipoib->data );
792 * Close IPoIB network device
794 * @v netdev Network device
796 static void ipoib_close ( struct net_device *netdev ) {
797 struct ipoib_device *ipoib = netdev->priv;
798 struct ib_device *ibdev = ipoib->ibdev;
800 /* Detach from broadcast multicast GID */
801 ib_mcast_detach ( ibdev, ipoib->data.qp, &ipoib_broadcast.gid );
803 /* FIXME: should probably flush the receive ring */
806 /** IPoIB network device operations */
807 static struct net_device_operations ipoib_operations = {
809 .close = ipoib_close,
810 .transmit = ipoib_transmit,
816 * Join IPoIB broadcast group
818 * @v ipoib IPoIB device
819 * @ret rc Return status code
821 static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
822 struct ib_device *ibdev = ipoib->ibdev;
823 unsigned int delay_ms;
826 /* Make sure we have some receive descriptors */
827 ipoib_refill_recv ( ipoib, &ipoib->meta );
829 /* Send join request */
830 if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid,
832 DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
833 ipoib, strerror ( rc ) );
837 /* Wait for join to complete. Ideally we wouldn't delay for
838 * this long, but we need the queue key before we can set up
839 * the data queue pair, which we need before we can know the
842 for ( delay_ms = IPOIB_JOIN_MAX_DELAY_MS ; delay_ms ; delay_ms-- ) {
844 ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send,
845 ipoib_meta_complete_recv );
846 ipoib_refill_recv ( ipoib, &ipoib->meta );
847 if ( ipoib->broadcast_joined )
850 DBGC ( ipoib, "IPoIB %p timed out waiting for broadcast join\n",
859 * @v ibdev Infiniband device
860 * @ret rc Return status code
862 int ipoib_probe ( struct ib_device *ibdev ) {
863 struct net_device *netdev;
864 struct ipoib_device *ipoib;
865 struct ipoib_mac *mac;
868 /* Allocate network device */
869 netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
872 netdev_init ( netdev, &ipoib_operations );
873 ipoib = netdev->priv;
874 ib_set_ownerdata ( ibdev, netdev );
875 netdev->dev = ibdev->dev;
876 memset ( ipoib, 0, sizeof ( *ipoib ) );
877 ipoib->netdev = netdev;
878 ipoib->ibdev = ibdev;
880 /* Calculate broadcast GID */
881 memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid,
882 sizeof ( ipoib->broadcast_gid ) );
883 ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey );
885 /* Allocate metadata queue set */
886 if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta,
888 IPOIB_META_NUM_SEND_WQES,
889 IPOIB_META_NUM_RECV_WQES,
890 IB_GLOBAL_QKEY ) ) != 0 ) {
891 DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n",
892 ipoib, strerror ( rc ) );
893 goto err_create_meta_qset;
897 ipoib->data_qkey = hack_ipoib_qkey;
900 /* Join broadcast group */
901 if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
902 DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
903 ipoib, strerror ( rc ) );
904 goto err_join_broadcast_group;
907 /* Allocate data queue set */
908 if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data,
910 IPOIB_DATA_NUM_SEND_WQES,
911 IPOIB_DATA_NUM_RECV_WQES,
912 ipoib->data_qkey ) ) != 0 ) {
913 DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n",
914 ipoib, strerror ( rc ) );
915 goto err_create_data_qset;
918 /* Construct MAC address */
919 mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
920 mac->qpn = htonl ( ipoib->data.qp->qpn );
921 memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) );
923 /* Register network device */
924 if ( ( rc = register_netdev ( netdev ) ) != 0 )
925 goto err_register_netdev;
930 ipoib_destroy_qset ( ipoib, &ipoib->data );
931 err_join_broadcast_group:
932 err_create_data_qset:
933 ipoib_destroy_qset ( ipoib, &ipoib->meta );
934 err_create_meta_qset:
935 netdev_nullify ( netdev );
936 netdev_put ( netdev );
941 * Remove IPoIB device
943 * @v ibdev Infiniband device
945 void ipoib_remove ( struct ib_device *ibdev ) {
946 struct net_device *netdev = ib_get_ownerdata ( ibdev );
947 struct ipoib_device *ipoib = netdev->priv;
949 unregister_netdev ( netdev );
950 ipoib_destroy_qset ( ipoib, &ipoib->data );
951 ipoib_destroy_qset ( ipoib, &ipoib->meta );
952 netdev_nullify ( netdev );
953 netdev_put ( netdev );