2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25 #include <gpxe/if_arp.h>
26 #include <gpxe/iobuf.h>
27 #include <gpxe/netdevice.h>
28 #include <gpxe/infiniband.h>
29 #include <gpxe/ipoib.h>
37 #define IPOIB_MTU 2048
39 /** Number of IPoIB data send work queue entries */
40 #define IPOIB_DATA_NUM_SEND_WQES 2
42 /** Number of IPoIB data receive work queue entries */
43 #define IPOIB_DATA_NUM_RECV_WQES 4
45 /** Number of IPoIB data completion entries */
46 #define IPOIB_DATA_NUM_CQES 8
48 /** Number of IPoIB metadata send work queue entries */
49 #define IPOIB_META_NUM_SEND_WQES 2
51 /** Number of IPoIB metadata receive work queue entries */
52 #define IPOIB_META_NUM_RECV_WQES 2
54 /** Number of IPoIB metadata completion entries */
55 #define IPOIB_META_NUM_CQES 8
57 /** An IPoIB queue set */
58 struct ipoib_queue_set {
59 /** Completion queue */
60 struct ib_completion_queue *cq;
62 struct ib_queue_pair *qp;
63 /** Receive work queue fill level */
64 unsigned int recv_fill;
65 /** Receive work queue maximum fill level */
66 unsigned int recv_max_fill;
69 /** An IPoIB device */
72 struct net_device *netdev;
73 /** Underlying Infiniband device */
74 struct ib_device *ibdev;
76 struct ipoib_queue_set data;
78 struct ipoib_queue_set meta;
80 struct ib_gid broadcast_gid;
82 unsigned int broadcast_lid;
84 unsigned long data_qkey;
85 /** Attached to multicast group
87 * This flag indicates whether or not we have attached our
88 * data queue pair to the broadcast multicast GID.
90 int broadcast_attached;
94 * IPoIB path cache entry
96 * This serves a similar role to the ARP cache for Ethernet. (ARP
97 * *is* used on IPoIB; we have two caches to maintain.)
99 struct ipoib_cached_path {
100 /** Destination GID */
102 /** Destination LID */
110 /** Number of IPoIB path cache entries */
111 #define IPOIB_NUM_CACHED_PATHS 2
113 /** IPoIB path cache */
114 static struct ipoib_cached_path ipoib_path_cache[IPOIB_NUM_CACHED_PATHS];
116 /** Oldest IPoIB path cache entry index */
117 static unsigned int ipoib_path_cache_idx = 0;
119 /** TID half used to identify get path record replies */
120 #define IPOIB_TID_GET_PATH_REC 0x11111111UL
122 /** TID half used to identify multicast member record replies */
123 #define IPOIB_TID_MC_MEMBER_REC 0x22222222UL
125 /** IPoIB metadata TID */
126 static uint32_t ipoib_meta_tid = 0;
128 /** IPv4 broadcast GID */
129 static const struct ib_gid ipv4_broadcast_gid = {
130 { { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
131 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff } }
134 /** Maximum time we will wait for the broadcast join to succeed */
135 #define IPOIB_JOIN_MAX_DELAY_MS 1000
137 /****************************************************************************
141 ****************************************************************************
144 /** Broadcast QPN used in IPoIB MAC addresses
146 * This is a guaranteed invalid real QPN
148 #define IPOIB_BROADCAST_QPN 0xffffffffUL
150 /** Broadcast IPoIB address */
151 static struct ipoib_mac ipoib_broadcast = {
152 .qpn = ntohl ( IPOIB_BROADCAST_QPN ),
156 * Transmit IPoIB packet
158 * @v iobuf I/O buffer
159 * @v netdev Network device
160 * @v net_protocol Network-layer protocol
161 * @v ll_dest Link-layer destination address
163 * Prepends the IPoIB link-layer header and transmits the packet.
165 static int ipoib_tx ( struct io_buffer *iobuf, struct net_device *netdev,
166 struct net_protocol *net_protocol,
167 const void *ll_dest ) {
168 struct ipoib_hdr *ipoib_hdr =
169 iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
171 /* Build IPoIB header */
172 memcpy ( &ipoib_hdr->pseudo.peer, ll_dest,
173 sizeof ( ipoib_hdr->pseudo.peer ) );
174 ipoib_hdr->real.proto = net_protocol->net_proto;
175 ipoib_hdr->real.reserved = 0;
177 /* Hand off to network device */
178 return netdev_tx ( netdev, iobuf );
182 * Process received IPoIB packet
184 * @v iobuf I/O buffer
185 * @v netdev Network device
187 * Strips off the IPoIB link-layer header and passes up to the
188 * network-layer protocol.
190 static int ipoib_rx ( struct io_buffer *iobuf, struct net_device *netdev ) {
191 struct ipoib_hdr *ipoib_hdr = iobuf->data;
194 if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
195 DBG ( "IPoIB packet too short for link-layer header\n" );
196 DBG_HD ( iobuf->data, iob_len ( iobuf ) );
201 /* Strip off IPoIB header */
202 iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
204 /* Hand off to network-layer protocol */
205 return net_rx ( iobuf, netdev, ipoib_hdr->real.proto,
206 &ipoib_hdr->pseudo.peer );
210 * Transcribe IPoIB address
212 * @v ll_addr Link-layer address
213 * @ret string Link-layer address in human-readable format
215 const char * ipoib_ntoa ( const void *ll_addr ) {
217 const struct ipoib_mac *mac = ll_addr;
219 snprintf ( buf, sizeof ( buf ), "%08lx:%08lx:%08lx:%08lx:%08lx",
220 htonl ( mac->qpn ), htonl ( mac->gid.u.dwords[0] ),
221 htonl ( mac->gid.u.dwords[1] ),
222 htonl ( mac->gid.u.dwords[2] ),
223 htonl ( mac->gid.u.dwords[3] ) );
227 /** IPoIB protocol */
228 struct ll_protocol ipoib_protocol __ll_protocol = {
230 .ll_proto = htons ( ARPHRD_INFINIBAND ),
231 .ll_addr_len = IPOIB_ALEN,
232 .ll_header_len = IPOIB_HLEN,
233 .ll_broadcast = ( uint8_t * ) &ipoib_broadcast,
239 /****************************************************************************
241 * IPoIB network device
243 ****************************************************************************
249 * @v ipoib IPoIB device
252 static void ipoib_destroy_qset ( struct ipoib_device *ipoib,
253 struct ipoib_queue_set *qset ) {
254 struct ib_device *ibdev = ipoib->ibdev;
257 ib_destroy_qp ( ibdev, qset->qp );
259 ib_destroy_cq ( ibdev, qset->cq );
260 memset ( qset, 0, sizeof ( *qset ) );
266 * @v ipoib IPoIB device
268 * @ret rc Return status code
270 static int ipoib_create_qset ( struct ipoib_device *ipoib,
271 struct ipoib_queue_set *qset,
272 unsigned int num_cqes,
273 unsigned int num_send_wqes,
274 unsigned int num_recv_wqes,
275 unsigned long qkey ) {
276 struct ib_device *ibdev = ipoib->ibdev;
280 assert ( qset->cq == NULL );
281 assert ( qset->qp == NULL );
283 /* Store queue parameters */
284 qset->recv_max_fill = num_recv_wqes;
286 /* Allocate completion queue */
287 qset->cq = ib_create_cq ( ibdev, num_cqes );
289 DBGC ( ipoib, "IPoIB %p could not allocate completion queue\n",
295 /* Allocate queue pair */
296 qset->qp = ib_create_qp ( ibdev, num_send_wqes, qset->cq,
297 num_recv_wqes, qset->cq, qkey );
299 DBGC ( ipoib, "IPoIB %p could not allocate queue pair\n",
304 ib_qp_set_ownerdata ( qset->qp, ipoib->netdev );
309 ipoib_destroy_qset ( ipoib, qset );
314 * Find path cache entry by GID
317 * @ret entry Path cache entry, or NULL
319 static struct ipoib_cached_path *
320 ipoib_find_cached_path ( struct ib_gid *gid ) {
321 struct ipoib_cached_path *path;
324 for ( i = 0 ; i < IPOIB_NUM_CACHED_PATHS ; i++ ) {
325 path = &ipoib_path_cache[i];
326 if ( memcmp ( &path->gid, gid, sizeof ( *gid ) ) == 0 )
329 DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx cache miss\n",
330 htonl ( gid->u.dwords[0] ), htonl ( gid->u.dwords[1] ),
331 htonl ( gid->u.dwords[2] ), htonl ( gid->u.dwords[3] ) );
336 * Transmit path record request
338 * @v ipoib IPoIB device
339 * @v gid Destination GID
340 * @ret rc Return status code
342 static int ipoib_get_path_record ( struct ipoib_device *ipoib,
343 struct ib_gid *gid ) {
344 struct ib_device *ibdev = ipoib->ibdev;
345 struct io_buffer *iobuf;
346 struct ib_mad_path_record *path_record;
347 struct ib_address_vector av;
350 /* Allocate I/O buffer */
351 iobuf = alloc_iob ( sizeof ( *path_record ) );
354 iob_put ( iobuf, sizeof ( *path_record ) );
355 path_record = iobuf->data;
356 memset ( path_record, 0, sizeof ( *path_record ) );
358 /* Construct path record request */
359 path_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
360 path_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
361 path_record->mad_hdr.class_version = 2;
362 path_record->mad_hdr.method = IB_MGMT_METHOD_GET;
363 path_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_PATH_REC );
364 path_record->mad_hdr.tid[0] = IPOIB_TID_GET_PATH_REC;
365 path_record->mad_hdr.tid[1] = ipoib_meta_tid++;
366 path_record->sa_hdr.comp_mask[1] =
367 htonl ( IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID );
368 memcpy ( &path_record->dgid, gid, sizeof ( path_record->dgid ) );
369 memcpy ( &path_record->sgid, &ibdev->port_gid,
370 sizeof ( path_record->sgid ) );
372 /* Construct address vector */
373 memset ( &av, 0, sizeof ( av ) );
374 av.dlid = ibdev->sm_lid;
375 av.dest_qp = IB_SA_QPN;
376 av.qkey = IB_GLOBAL_QKEY;
378 /* Post send request */
379 if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av,
381 DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n",
382 ipoib, strerror ( rc ) );
391 * Transmit multicast group membership request
393 * @v ipoib IPoIB device
394 * @v gid Multicast GID
395 * @v join Join (rather than leave) group
396 * @ret rc Return status code
398 static int ipoib_mc_member_record ( struct ipoib_device *ipoib,
399 struct ib_gid *gid, int join ) {
400 struct ib_device *ibdev = ipoib->ibdev;
401 struct io_buffer *iobuf;
402 struct ib_mad_mc_member_record *mc_member_record;
403 struct ib_address_vector av;
406 /* Allocate I/O buffer */
407 iobuf = alloc_iob ( sizeof ( *mc_member_record ) );
410 iob_put ( iobuf, sizeof ( *mc_member_record ) );
411 mc_member_record = iobuf->data;
412 memset ( mc_member_record, 0, sizeof ( *mc_member_record ) );
414 /* Construct path record request */
415 mc_member_record->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
416 mc_member_record->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
417 mc_member_record->mad_hdr.class_version = 2;
418 mc_member_record->mad_hdr.method =
419 ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE );
420 mc_member_record->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC );
421 mc_member_record->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC;
422 mc_member_record->mad_hdr.tid[1] = ipoib_meta_tid++;
423 mc_member_record->sa_hdr.comp_mask[1] =
424 htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
425 IB_SA_MCMEMBER_REC_JOIN_STATE );
426 mc_member_record->scope__join_state = 1;
427 memcpy ( &mc_member_record->mgid, gid,
428 sizeof ( mc_member_record->mgid ) );
429 memcpy ( &mc_member_record->port_gid, &ibdev->port_gid,
430 sizeof ( mc_member_record->port_gid ) );
432 /* Construct address vector */
433 memset ( &av, 0, sizeof ( av ) );
434 av.dlid = ibdev->sm_lid;
435 av.dest_qp = IB_SA_QPN;
436 av.qkey = IB_GLOBAL_QKEY;
438 /* Post send request */
439 if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av,
441 DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n",
442 ipoib, strerror ( rc ) );
451 * Transmit packet via IPoIB network device
453 * @v netdev Network device
454 * @v iobuf I/O buffer
455 * @ret rc Return status code
457 static int ipoib_transmit ( struct net_device *netdev,
458 struct io_buffer *iobuf ) {
459 struct ipoib_device *ipoib = netdev->priv;
460 struct ib_device *ibdev = ipoib->ibdev;
461 struct ipoib_pseudo_hdr *ipoib_pshdr = iobuf->data;
462 struct ib_address_vector av;
464 struct ipoib_cached_path *path;
468 if ( iob_len ( iobuf ) < sizeof ( *ipoib_pshdr ) ) {
469 DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
472 iob_pull ( iobuf, ( sizeof ( *ipoib_pshdr ) ) );
474 /* Construct address vector */
475 memset ( &av, 0, sizeof ( av ) );
476 av.qkey = IB_GLOBAL_QKEY;
478 if ( ipoib_pshdr->peer.qpn == htonl ( IPOIB_BROADCAST_QPN ) ) {
479 /* Broadcast address */
480 av.dest_qp = IB_BROADCAST_QPN;
481 av.dlid = ipoib->broadcast_lid;
482 gid = &ipoib->broadcast_gid;
484 /* Unicast - look in path cache */
485 path = ipoib_find_cached_path ( &ipoib_pshdr->peer.gid );
487 /* No path entry - get path record */
488 rc = ipoib_get_path_record ( ipoib,
489 &ipoib_pshdr->peer.gid );
490 netdev_tx_complete ( netdev, iobuf );
493 av.dest_qp = ntohl ( ipoib_pshdr->peer.qpn );
494 av.dlid = path->dlid;
495 av.rate = path->rate;
497 gid = &ipoib_pshdr->peer.gid;
499 memcpy ( &av.gid, gid, sizeof ( av.gid ) );
501 return ib_post_send ( ibdev, ipoib->data.qp, &av, iobuf );
505 * Handle IPoIB data send completion
507 * @v ibdev Infiniband device
509 * @v completion Completion
510 * @v iobuf I/O buffer
512 static void ipoib_data_complete_send ( struct ib_device *ibdev __unused,
513 struct ib_queue_pair *qp,
514 struct ib_completion *completion,
515 struct io_buffer *iobuf ) {
516 struct net_device *netdev = ib_qp_get_ownerdata ( qp );
518 netdev_tx_complete_err ( netdev, iobuf,
519 ( completion->syndrome ? -EIO : 0 ) );
523 * Handle IPoIB data receive completion
525 * @v ibdev Infiniband device
527 * @v completion Completion
528 * @v iobuf I/O buffer
530 static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused,
531 struct ib_queue_pair *qp,
532 struct ib_completion *completion,
533 struct io_buffer *iobuf ) {
534 struct net_device *netdev = ib_qp_get_ownerdata ( qp );
535 struct ipoib_device *ipoib = netdev->priv;
536 struct ipoib_pseudo_hdr *ipoib_pshdr;
538 if ( completion->syndrome ) {
539 netdev_rx_err ( netdev, iobuf, -EIO );
543 iob_put ( iobuf, completion->len );
544 if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) {
545 DBGC ( ipoib, "IPoIB %p received data packet too short to "
546 "contain GRH\n", ipoib );
547 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
548 netdev_rx_err ( netdev, iobuf, -EIO );
551 iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) );
553 if ( iob_len ( iobuf ) < sizeof ( struct ipoib_real_hdr ) ) {
554 DBGC ( ipoib, "IPoIB %p received data packet too short to "
555 "contain IPoIB header\n", ipoib );
556 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
557 netdev_rx_err ( netdev, iobuf, -EIO );
561 ipoib_pshdr = iob_push ( iobuf, sizeof ( *ipoib_pshdr ) );
562 /* FIXME: fill in a MAC address for the sake of AoE! */
564 netdev_rx ( netdev, iobuf );
567 ipoib->data.recv_fill--;
571 * Handle IPoIB metadata send completion
573 * @v ibdev Infiniband device
575 * @v completion Completion
576 * @v iobuf I/O buffer
578 static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused,
579 struct ib_queue_pair *qp,
580 struct ib_completion *completion,
581 struct io_buffer *iobuf ) {
582 struct net_device *netdev = ib_qp_get_ownerdata ( qp );
583 struct ipoib_device *ipoib = netdev->priv;
585 if ( completion->syndrome ) {
586 DBGC ( ipoib, "IPoIB %p metadata TX completion error %x\n",
587 ipoib, completion->syndrome );
593 * Handle received IPoIB path record
595 * @v ipoib IPoIB device
596 * @v path_record Path record
598 static void ipoib_recv_path_record ( struct ipoib_device *ipoib __unused,
599 struct ib_mad_path_record *path_record ) {
600 struct ipoib_cached_path *path;
602 /* Update path cache entry */
603 path = &ipoib_path_cache[ipoib_path_cache_idx];
604 memcpy ( &path->gid, &path_record->dgid, sizeof ( path->gid ) );
605 path->dlid = ntohs ( path_record->dlid );
606 path->sl = ( path_record->reserved__sl & 0x0f );
607 path->rate = ( path_record->rate_selector__rate & 0x3f );
609 DBG ( "IPoIB %08lx:%08lx:%08lx:%08lx dlid %x sl %x rate %x\n",
610 htonl ( path->gid.u.dwords[0] ), htonl ( path->gid.u.dwords[1] ),
611 htonl ( path->gid.u.dwords[2] ), htonl ( path->gid.u.dwords[3] ),
612 path->dlid, path->sl, path->rate );
614 /* Update path cache index */
615 ipoib_path_cache_idx++;
616 if ( ipoib_path_cache_idx == IPOIB_NUM_CACHED_PATHS )
617 ipoib_path_cache_idx = 0;
621 * Handle received IPoIB multicast membership record
623 * @v ipoib IPoIB device
624 * @v mc_member_record Multicast membership record
626 static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib,
627 struct ib_mad_mc_member_record *mc_member_record ) {
631 /* Record parameters */
632 joined = ( mc_member_record->scope__join_state & 0x0f );
633 ipoib->data_qkey = ntohl ( mc_member_record->qkey );
634 ipoib->broadcast_lid = ntohs ( mc_member_record->mlid );
635 DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx mlid %x\n",
636 ipoib, ( joined ? "joined" : "left" ), ipoib->data_qkey,
637 ipoib->broadcast_lid );
639 /* Update data queue pair qkey */
640 if ( ( rc = ib_modify_qp ( ipoib->ibdev, ipoib->data.qp,
641 IB_MODIFY_QKEY, ipoib->data_qkey ) ) != 0 ){
642 DBGC ( ipoib, "IPoIB %p could not update data qkey: %s\n",
643 ipoib, strerror ( rc ) );
649 * Handle IPoIB metadata receive completion
651 * @v ibdev Infiniband device
653 * @v completion Completion
654 * @v iobuf I/O buffer
656 static void ipoib_meta_complete_recv ( struct ib_device *ibdev __unused,
657 struct ib_queue_pair *qp,
658 struct ib_completion *completion,
659 struct io_buffer *iobuf ) {
660 struct net_device *netdev = ib_qp_get_ownerdata ( qp );
661 struct ipoib_device *ipoib = netdev->priv;
664 if ( completion->syndrome ) {
665 DBGC ( ipoib, "IPoIB %p metadata RX completion error %x\n",
666 ipoib, completion->syndrome );
670 iob_put ( iobuf, completion->len );
671 if ( iob_len ( iobuf ) < sizeof ( struct ib_global_route_header ) ) {
672 DBGC ( ipoib, "IPoIB %p received metadata packet too short "
673 "to contain GRH\n", ipoib );
674 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
677 iob_pull ( iobuf, sizeof ( struct ib_global_route_header ) );
678 if ( iob_len ( iobuf ) < sizeof ( *mad ) ) {
679 DBGC ( ipoib, "IPoIB %p received metadata packet too short "
680 "to contain reply\n", ipoib );
681 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
686 if ( mad->mad_hdr.status != 0 ) {
687 DBGC ( ipoib, "IPoIB %p metadata RX err status %04x\n",
688 ipoib, ntohs ( mad->mad_hdr.status ) );
692 switch ( mad->mad_hdr.tid[0] ) {
693 case IPOIB_TID_GET_PATH_REC:
694 ipoib_recv_path_record ( ipoib, &mad->path_record );
696 case IPOIB_TID_MC_MEMBER_REC:
697 ipoib_recv_mc_member_record ( ipoib, &mad->mc_member_record );
700 DBGC ( ipoib, "IPoIB %p unwanted response:\n",
702 DBGC_HD ( ipoib, mad, sizeof ( *mad ) );
707 ipoib->meta.recv_fill--;
712 * Refill IPoIB receive ring
714 * @v ipoib IPoIB device
716 static void ipoib_refill_recv ( struct ipoib_device *ipoib,
717 struct ipoib_queue_set *qset ) {
718 struct ib_device *ibdev = ipoib->ibdev;
719 struct io_buffer *iobuf;
722 while ( qset->recv_fill < qset->recv_max_fill ) {
723 iobuf = alloc_iob ( IPOIB_MTU );
726 if ( ( rc = ib_post_recv ( ibdev, qset->qp, iobuf ) ) != 0 ) {
735 * Poll IPoIB network device
737 * @v netdev Network device
739 static void ipoib_poll ( struct net_device *netdev ) {
740 struct ipoib_device *ipoib = netdev->priv;
741 struct ib_device *ibdev = ipoib->ibdev;
743 ib_poll_cq ( ibdev, ipoib->meta.cq, ipoib_meta_complete_send,
744 ipoib_meta_complete_recv );
745 ib_poll_cq ( ibdev, ipoib->data.cq, ipoib_data_complete_send,
746 ipoib_data_complete_recv );
747 ipoib_refill_recv ( ipoib, &ipoib->meta );
748 ipoib_refill_recv ( ipoib, &ipoib->data );
752 * Enable/disable interrupts on IPoIB network device
754 * @v netdev Network device
755 * @v enable Interrupts should be enabled
757 static void ipoib_irq ( struct net_device *netdev __unused,
758 int enable __unused ) {
759 /* No implementation */
763 * Join IPv4 broadcast multicast group
765 * @v ipoib IPoIB device
766 * @ret rc Return status code
768 static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
772 if ( ! ipoib->data.qp )
775 /* Attach data queue to broadcast multicast GID */
776 assert ( ipoib->broadcast_attached == 0 );
777 if ( ( rc = ib_mcast_attach ( ipoib->ibdev, ipoib->data.qp,
778 &ipoib->broadcast_gid ) ) != 0 ){
779 DBGC ( ipoib, "IPoIB %p could not attach to broadcast GID: "
780 "%s\n", ipoib, strerror ( rc ) );
783 ipoib->broadcast_attached = 1;
785 /* Initiate broadcast group join */
786 if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid,
788 DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
789 ipoib, strerror ( rc ) );
797 * Leave IPv4 broadcast multicast group
799 * @v ipoib IPoIB device
801 static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
803 /* Detach data queue from broadcast multicast GID */
804 if ( ipoib->broadcast_attached ) {
805 assert ( ipoib->data.qp != NULL );
806 ib_mcast_detach ( ipoib->ibdev, ipoib->data.qp,
807 &ipoib->broadcast_gid );
808 ipoib->broadcast_attached = 0;
813 * Open IPoIB network device
815 * @v netdev Network device
816 * @ret rc Return status code
818 static int ipoib_open ( struct net_device *netdev ) {
819 struct ipoib_device *ipoib = netdev->priv;
820 struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
823 /* Allocate metadata queue set */
824 if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->meta,
826 IPOIB_META_NUM_SEND_WQES,
827 IPOIB_META_NUM_RECV_WQES,
828 IB_GLOBAL_QKEY ) ) != 0 ) {
829 DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n",
830 ipoib, strerror ( rc ) );
831 goto err_create_meta_qset;
834 /* Allocate data queue set */
835 if ( ( rc = ipoib_create_qset ( ipoib, &ipoib->data,
837 IPOIB_DATA_NUM_SEND_WQES,
838 IPOIB_DATA_NUM_RECV_WQES,
839 IB_GLOBAL_QKEY ) ) != 0 ) {
840 DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n",
841 ipoib, strerror ( rc ) );
842 goto err_create_data_qset;
845 /* Update MAC address with data QPN */
846 mac->qpn = htonl ( ipoib->data.qp->qpn );
848 /* Fill receive rings */
849 ipoib_refill_recv ( ipoib, &ipoib->meta );
850 ipoib_refill_recv ( ipoib, &ipoib->data );
852 /* Join broadcast group */
853 if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
854 DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
855 ipoib, strerror ( rc ) );
856 goto err_join_broadcast;
862 ipoib_destroy_qset ( ipoib, &ipoib->data );
863 err_create_data_qset:
864 ipoib_destroy_qset ( ipoib, &ipoib->meta );
865 err_create_meta_qset:
870 * Close IPoIB network device
872 * @v netdev Network device
874 static void ipoib_close ( struct net_device *netdev ) {
875 struct ipoib_device *ipoib = netdev->priv;
876 struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
878 /* Leave broadcast group */
879 ipoib_leave_broadcast_group ( ipoib );
881 /* Remove data QPN from MAC address */
884 /* Tear down the queues */
885 ipoib_destroy_qset ( ipoib, &ipoib->data );
886 ipoib_destroy_qset ( ipoib, &ipoib->meta );
889 /** IPoIB network device operations */
890 static struct net_device_operations ipoib_operations = {
892 .close = ipoib_close,
893 .transmit = ipoib_transmit,
899 * Update IPoIB dynamic Infiniband parameters
901 * @v ipoib IPoIB device
903 * The Infiniband port GID and partition key will change at runtime,
904 * when the link is established (or lost). The MAC address is based
905 * on the port GID, and the broadcast GID is based on the partition
906 * key. This function recalculates these IPoIB device parameters.
908 static void ipoib_set_ib_params ( struct ipoib_device *ipoib ) {
909 struct ib_device *ibdev = ipoib->ibdev;
910 struct ipoib_mac *mac;
912 /* Calculate GID portion of MAC address based on port GID */
913 mac = ( ( struct ipoib_mac * ) ipoib->netdev->ll_addr );
914 memcpy ( &mac->gid, &ibdev->port_gid, sizeof ( mac->gid ) );
916 /* Calculate broadcast GID based on partition key */
917 memcpy ( &ipoib->broadcast_gid, &ipv4_broadcast_gid,
918 sizeof ( ipoib->broadcast_gid ) );
919 ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey );
923 * Handle link status change
925 * @v ibdev Infiniband device
927 void ipoib_link_state_changed ( struct ib_device *ibdev ) {
928 struct net_device *netdev = ib_get_ownerdata ( ibdev );
929 struct ipoib_device *ipoib = netdev->priv;
932 /* Leave existing broadcast group */
933 ipoib_leave_broadcast_group ( ipoib );
935 /* Update MAC address and broadcast GID based on new port GID
938 ipoib_set_ib_params ( ipoib );
940 /* Join new broadcast group */
941 if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
942 DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
943 "%s\n", ipoib, strerror ( rc ) );
951 * @v ibdev Infiniband device
952 * @ret rc Return status code
954 int ipoib_probe ( struct ib_device *ibdev ) {
955 struct net_device *netdev;
956 struct ipoib_device *ipoib;
959 /* Allocate network device */
960 netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
963 netdev_init ( netdev, &ipoib_operations );
964 ipoib = netdev->priv;
965 ib_set_ownerdata ( ibdev, netdev );
966 netdev->dev = ibdev->dev;
967 memset ( ipoib, 0, sizeof ( *ipoib ) );
968 ipoib->netdev = netdev;
969 ipoib->ibdev = ibdev;
971 /* Calculate as much of the broadcast GID and the MAC address
972 * as we can. We won't know either of these in full until we
975 ipoib_set_ib_params ( ipoib );
977 /* Register network device */
978 if ( ( rc = register_netdev ( netdev ) ) != 0 )
979 goto err_register_netdev;
984 netdev_nullify ( netdev );
985 netdev_put ( netdev );
990 * Remove IPoIB device
992 * @v ibdev Infiniband device
994 void ipoib_remove ( struct ib_device *ibdev ) {
995 struct net_device *netdev = ib_get_ownerdata ( ibdev );
997 unregister_netdev ( netdev );
998 netdev_nullify ( netdev );
999 netdev_put ( netdev );