2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 FILE_LICENCE ( GPL2_OR_LATER );
27 #include <gpxe/if_arp.h>
28 #include <gpxe/iobuf.h>
29 #include <gpxe/netdevice.h>
30 #include <gpxe/infiniband.h>
31 #include <gpxe/ib_qset.h>
32 #include <gpxe/ib_pathrec.h>
33 #include <gpxe/ipoib.h>
40 /** Number of IPoIB data send work queue entries */
41 #define IPOIB_DATA_NUM_SEND_WQES 2
43 /** Number of IPoIB data receive work queue entries */
44 #define IPOIB_DATA_NUM_RECV_WQES 4
46 /** Number of IPoIB data completion entries */
47 #define IPOIB_DATA_NUM_CQES 8
49 /** Number of IPoIB metadata send work queue entries */
50 #define IPOIB_META_NUM_SEND_WQES 2
52 /** Number of IPoIB metadata receive work queue entries */
53 #define IPOIB_META_NUM_RECV_WQES 2
55 /** Number of IPoIB metadata completion entries */
56 #define IPOIB_META_NUM_CQES 8
58 /** An IPoIB device */
61 struct net_device *netdev;
62 /** Underlying Infiniband device */
63 struct ib_device *ibdev;
65 struct ib_queue_set data;
67 struct ib_queue_set meta;
69 struct ib_gid broadcast_gid;
71 unsigned int broadcast_lid;
73 unsigned long data_qkey;
74 /** Attached to multicast group
76 * This flag indicates whether or not we have attached our
77 * data queue pair to the broadcast multicast GID.
79 int broadcast_attached;
82 /** TID half used to identify multicast member record replies */
83 #define IPOIB_TID_MC_MEMBER_REC 0x22222222UL
85 /** IPoIB metadata TID */
86 static uint32_t ipoib_meta_tid = 0;
88 /** Broadcast IPoIB address */
89 static struct ipoib_mac ipoib_broadcast = {
90 .qpn = htonl ( IB_QPN_BROADCAST ),
91 .gid.u.bytes = { 0xff, 0x12, 0x40, 0x1b, 0x00, 0x00, 0x00, 0x00,
92 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff },
95 /****************************************************************************
99 ****************************************************************************
105 * This serves a similar role to the ARP cache for Ethernet. (ARP
106 * *is* used on IPoIB; we have two caches to maintain.)
112 struct ipoib_mac mac;
115 /** Number of IPoIB peer cache entries
117 * Must be a power of two.
119 #define IPOIB_NUM_CACHED_PEERS 4
121 /** IPoIB peer address cache */
122 static struct ipoib_peer ipoib_peer_cache[IPOIB_NUM_CACHED_PEERS];
124 /** Oldest IPoIB peer cache entry index */
125 static unsigned int ipoib_peer_cache_idx = 1;
128 * Look up cached peer by key
130 * @v key Peer cache key
131 * @ret peer Peer cache entry, or NULL
133 static struct ipoib_peer * ipoib_lookup_peer_by_key ( unsigned int key ) {
134 struct ipoib_peer *peer;
137 for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
138 peer = &ipoib_peer_cache[i];
139 if ( peer->key == key )
144 DBG ( "IPoIB warning: peer cache lost track of key %x while "
145 "still in use\n", key );
151 * Look up cached peer by GID
154 * @ret peer Peer cache entry, or NULL
156 static struct ipoib_peer *
157 ipoib_lookup_peer_by_gid ( const struct ib_gid *gid ) {
158 struct ipoib_peer *peer;
161 for ( i = 0 ; i < IPOIB_NUM_CACHED_PEERS ; i++ ) {
162 peer = &ipoib_peer_cache[i];
163 if ( memcmp ( &peer->mac.gid, gid,
164 sizeof ( peer->mac.gid) ) == 0 ) {
173 * Store GID and QPN in peer cache
177 * @ret peer Peer cache entry
179 static struct ipoib_peer *
180 ipoib_cache_peer ( const struct ib_gid *gid, unsigned long qpn ) {
181 struct ipoib_peer *peer;
184 /* Look for existing cache entry */
185 peer = ipoib_lookup_peer_by_gid ( gid );
187 assert ( peer->mac.qpn = ntohl ( qpn ) );
191 /* No entry found: create a new one */
192 key = ipoib_peer_cache_idx++;
193 peer = &ipoib_peer_cache[ key % IPOIB_NUM_CACHED_PEERS ];
195 DBG ( "IPoIB peer %x evicted from cache\n", peer->key );
197 memset ( peer, 0, sizeof ( *peer ) );
199 peer->mac.qpn = htonl ( qpn );
200 memcpy ( &peer->mac.gid, gid, sizeof ( peer->mac.gid ) );
201 DBG ( "IPoIB peer %x has GID %08x:%08x:%08x:%08x and QPN %lx\n",
202 peer->key, htonl ( gid->u.dwords[0] ),
203 htonl ( gid->u.dwords[1] ), htonl ( gid->u.dwords[2] ),
204 htonl ( gid->u.dwords[3] ), qpn );
208 /****************************************************************************
212 ****************************************************************************
216 * Add IPoIB link-layer header
218 * @v netdev Network device
219 * @v iobuf I/O buffer
220 * @v ll_dest Link-layer destination address
221 * @v ll_source Source link-layer address
222 * @v net_proto Network-layer protocol, in network-byte order
223 * @ret rc Return status code
225 static int ipoib_push ( struct net_device *netdev __unused,
226 struct io_buffer *iobuf, const void *ll_dest,
227 const void *ll_source __unused, uint16_t net_proto ) {
228 struct ipoib_hdr *ipoib_hdr =
229 iob_push ( iobuf, sizeof ( *ipoib_hdr ) );
230 const struct ipoib_mac *dest_mac = ll_dest;
231 const struct ipoib_mac *src_mac = ll_source;
232 struct ipoib_peer *dest;
233 struct ipoib_peer *src;
235 /* Add link-layer addresses to cache */
236 dest = ipoib_cache_peer ( &dest_mac->gid, ntohl ( dest_mac->qpn ) );
237 src = ipoib_cache_peer ( &src_mac->gid, ntohl ( src_mac->qpn ) );
239 /* Build IPoIB header */
240 ipoib_hdr->proto = net_proto;
241 ipoib_hdr->u.peer.dest = dest->key;
242 ipoib_hdr->u.peer.src = src->key;
248 * Remove IPoIB link-layer header
250 * @v netdev Network device
251 * @v iobuf I/O buffer
252 * @ret ll_dest Link-layer destination address
253 * @ret ll_source Source link-layer address
254 * @ret net_proto Network-layer protocol, in network-byte order
255 * @ret rc Return status code
257 static int ipoib_pull ( struct net_device *netdev __unused,
258 struct io_buffer *iobuf, const void **ll_dest,
259 const void **ll_source, uint16_t *net_proto ) {
260 struct ipoib_hdr *ipoib_hdr = iobuf->data;
261 struct ipoib_peer *dest;
262 struct ipoib_peer *source;
265 if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
266 DBG ( "IPoIB packet too short for link-layer header\n" );
267 DBG_HD ( iobuf->data, iob_len ( iobuf ) );
271 /* Strip off IPoIB header */
272 iob_pull ( iobuf, sizeof ( *ipoib_hdr ) );
274 /* Identify source and destination addresses, and clear
275 * reserved word in IPoIB header
277 dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
278 source = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.src );
279 ipoib_hdr->u.reserved = 0;
281 /* Fill in required fields */
282 *ll_dest = ( dest ? &dest->mac : &ipoib_broadcast );
283 *ll_source = ( source ? &source->mac : &ipoib_broadcast );
284 *net_proto = ipoib_hdr->proto;
290 * Transcribe IPoIB address
292 * @v ll_addr Link-layer address
293 * @ret string Link-layer address in human-readable format
295 const char * ipoib_ntoa ( const void *ll_addr ) {
297 const struct ipoib_mac *mac = ll_addr;
299 snprintf ( buf, sizeof ( buf ), "%08x:%08x:%08x:%08x:%08x",
300 htonl ( mac->qpn ), htonl ( mac->gid.u.dwords[0] ),
301 htonl ( mac->gid.u.dwords[1] ),
302 htonl ( mac->gid.u.dwords[2] ),
303 htonl ( mac->gid.u.dwords[3] ) );
308 * Hash multicast address
310 * @v af Address family
311 * @v net_addr Network-layer address
312 * @v ll_addr Link-layer address to fill in
313 * @ret rc Return status code
315 static int ipoib_mc_hash ( unsigned int af __unused,
316 const void *net_addr __unused,
317 void *ll_addr __unused ) {
322 /** IPoIB protocol */
323 struct ll_protocol ipoib_protocol __ll_protocol = {
325 .ll_proto = htons ( ARPHRD_INFINIBAND ),
326 .ll_addr_len = IPOIB_ALEN,
327 .ll_header_len = IPOIB_HLEN,
331 .mc_hash = ipoib_mc_hash,
334 /****************************************************************************
336 * IPoIB network device
338 ****************************************************************************
342 * Transmit multicast group membership request
344 * @v ipoib IPoIB device
345 * @v gid Multicast GID
346 * @v join Join (rather than leave) group
347 * @ret rc Return status code
349 static int ipoib_mc_member_record ( struct ipoib_device *ipoib,
350 struct ib_gid *gid, int join ) {
351 struct ib_device *ibdev = ipoib->ibdev;
352 struct io_buffer *iobuf;
353 struct ib_mad_sa *sa;
354 struct ib_address_vector av;
357 /* Allocate I/O buffer */
358 iobuf = alloc_iob ( sizeof ( *sa ) );
361 iob_put ( iobuf, sizeof ( *sa ) );
363 memset ( sa, 0, sizeof ( *sa ) );
365 /* Construct path record request */
366 sa->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
367 sa->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
368 sa->mad_hdr.class_version = 2;
370 ( join ? IB_MGMT_METHOD_SET : IB_MGMT_METHOD_DELETE );
371 sa->mad_hdr.attr_id = htons ( IB_SA_ATTR_MC_MEMBER_REC );
372 sa->mad_hdr.tid[0] = IPOIB_TID_MC_MEMBER_REC;
373 sa->mad_hdr.tid[1] = ipoib_meta_tid++;
374 sa->sa_hdr.comp_mask[1] =
375 htonl ( IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
376 IB_SA_MCMEMBER_REC_JOIN_STATE );
377 sa->sa_data.mc_member_record.scope__join_state = 1;
378 memcpy ( &sa->sa_data.mc_member_record.mgid, gid,
379 sizeof ( sa->sa_data.mc_member_record.mgid ) );
380 memcpy ( &sa->sa_data.mc_member_record.port_gid, &ibdev->gid,
381 sizeof ( sa->sa_data.mc_member_record.port_gid ) );
383 /* Construct address vector */
384 memset ( &av, 0, sizeof ( av ) );
385 av.lid = ibdev->sm_lid;
386 av.sl = ibdev->sm_sl;
388 av.qkey = IB_QKEY_GMA;
390 /* Post send request */
391 if ( ( rc = ib_post_send ( ibdev, ipoib->meta.qp, &av,
393 DBGC ( ipoib, "IPoIB %p could not send get path record: %s\n",
394 ipoib, strerror ( rc ) );
403 * Transmit packet via IPoIB network device
405 * @v netdev Network device
406 * @v iobuf I/O buffer
407 * @ret rc Return status code
409 static int ipoib_transmit ( struct net_device *netdev,
410 struct io_buffer *iobuf ) {
411 struct ipoib_device *ipoib = netdev->priv;
412 struct ib_device *ibdev = ipoib->ibdev;
413 struct ipoib_hdr *ipoib_hdr;
414 struct ipoib_peer *dest;
415 struct ib_address_vector av;
419 if ( iob_len ( iobuf ) < sizeof ( *ipoib_hdr ) ) {
420 DBGC ( ipoib, "IPoIB %p buffer too short\n", ipoib );
423 ipoib_hdr = iobuf->data;
425 /* Attempting transmission while link is down will put the
426 * queue pair into an error state, so don't try it.
428 if ( ! ib_link_ok ( ibdev ) )
431 /* Identify destination address */
432 dest = ipoib_lookup_peer_by_key ( ipoib_hdr->u.peer.dest );
435 ipoib_hdr->u.reserved = 0;
437 /* Construct address vector */
438 memset ( &av, 0, sizeof ( av ) );
439 av.qpn = ntohl ( dest->mac.qpn );
440 av.qkey = ipoib->data_qkey;
442 if ( av.qpn == IB_QPN_BROADCAST ) {
444 av.lid = ipoib->broadcast_lid;
445 memcpy ( &av.gid, &ipoib->broadcast_gid, sizeof ( av.gid ) );
448 memcpy ( &av.gid, &dest->mac.gid, sizeof ( av.gid ) );
449 if ( ( rc = ib_resolve_path ( ibdev, &av ) ) != 0 ) {
450 /* Path not resolved yet */
455 return ib_post_send ( ibdev, ipoib->data.qp, &av, iobuf );
459 * Handle IPoIB data send completion
461 * @v ibdev Infiniband device
463 * @v iobuf I/O buffer
464 * @v rc Completion status code
466 static void ipoib_data_complete_send ( struct ib_device *ibdev __unused,
467 struct ib_queue_pair *qp,
468 struct io_buffer *iobuf, int rc ) {
469 struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
471 netdev_tx_complete_err ( ipoib->netdev, iobuf, rc );
475 * Handle IPoIB data receive completion
477 * @v ibdev Infiniband device
479 * @v av Address vector, or NULL
480 * @v iobuf I/O buffer
481 * @v rc Completion status code
483 static void ipoib_data_complete_recv ( struct ib_device *ibdev __unused,
484 struct ib_queue_pair *qp,
485 struct ib_address_vector *av,
486 struct io_buffer *iobuf, int rc ) {
487 struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
488 struct net_device *netdev = ipoib->netdev;
489 struct ipoib_hdr *ipoib_hdr;
490 struct ipoib_peer *src;
493 netdev_rx_err ( netdev, iobuf, rc );
498 if ( iob_len ( iobuf ) < sizeof ( struct ipoib_hdr ) ) {
499 DBGC ( ipoib, "IPoIB %p received data packet too short to "
500 "contain IPoIB header\n", ipoib );
501 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
502 netdev_rx_err ( netdev, iobuf, -EIO );
505 ipoib_hdr = iobuf->data;
507 /* Parse source address */
508 if ( av->gid_present ) {
509 src = ipoib_cache_peer ( &av->gid, av->qpn );
510 ipoib_hdr->u.peer.src = src->key;
513 /* Hand off to network layer */
514 netdev_rx ( netdev, iobuf );
517 /** IPoIB data completion operations */
518 static struct ib_completion_queue_operations ipoib_data_cq_op = {
519 .complete_send = ipoib_data_complete_send,
520 .complete_recv = ipoib_data_complete_recv,
524 * Handle IPoIB metadata send completion
526 * @v ibdev Infiniband device
528 * @v iobuf I/O buffer
529 * @v rc Completion status code
531 static void ipoib_meta_complete_send ( struct ib_device *ibdev __unused,
532 struct ib_queue_pair *qp,
533 struct io_buffer *iobuf, int rc ) {
534 struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
537 DBGC ( ipoib, "IPoIB %p metadata TX completion error: %s\n",
538 ipoib, strerror ( rc ) );
544 * Handle received IPoIB multicast membership record
546 * @v ipoib IPoIB device
547 * @v mc_member_record Multicast membership record
549 static void ipoib_recv_mc_member_record ( struct ipoib_device *ipoib,
550 struct ib_mc_member_record *mc_member_record ) {
554 /* Record parameters */
555 joined = ( mc_member_record->scope__join_state & 0x0f );
556 ipoib->data_qkey = ntohl ( mc_member_record->qkey );
557 ipoib->broadcast_lid = ntohs ( mc_member_record->mlid );
558 DBGC ( ipoib, "IPoIB %p %s broadcast group: qkey %lx mlid %x\n",
559 ipoib, ( joined ? "joined" : "left" ), ipoib->data_qkey,
560 ipoib->broadcast_lid );
562 /* Update data queue pair qkey */
563 if ( ( rc = ib_modify_qp ( ipoib->ibdev, ipoib->data.qp,
564 IB_MODIFY_QKEY, ipoib->data_qkey ) ) != 0 ){
565 DBGC ( ipoib, "IPoIB %p could not update data qkey: %s\n",
566 ipoib, strerror ( rc ) );
572 * Handle IPoIB metadata receive completion
574 * @v ibdev Infiniband device
576 * @v av Address vector, or NULL
577 * @v iobuf I/O buffer
578 * @v rc Completion status code
581 ipoib_meta_complete_recv ( struct ib_device *ibdev __unused,
582 struct ib_queue_pair *qp,
583 struct ib_address_vector *av __unused,
584 struct io_buffer *iobuf, int rc ) {
585 struct ipoib_device *ipoib = ib_qp_get_ownerdata ( qp );
586 struct ib_mad_sa *sa;
589 DBGC ( ipoib, "IPoIB %p metadata RX completion error: %s\n",
590 ipoib, strerror ( rc ) );
594 if ( iob_len ( iobuf ) < sizeof ( *sa ) ) {
595 DBGC ( ipoib, "IPoIB %p received metadata packet too short "
596 "to contain reply\n", ipoib );
597 DBGC_HD ( ipoib, iobuf->data, iob_len ( iobuf ) );
602 if ( sa->mad_hdr.status != 0 ) {
603 DBGC ( ipoib, "IPoIB %p metadata RX err status %04x\n",
604 ipoib, ntohs ( sa->mad_hdr.status ) );
608 switch ( sa->mad_hdr.tid[0] ) {
609 case IPOIB_TID_MC_MEMBER_REC:
610 ipoib_recv_mc_member_record ( ipoib,
611 &sa->sa_data.mc_member_record );
614 DBGC ( ipoib, "IPoIB %p unwanted response:\n",
616 DBGC_HD ( ipoib, sa, sizeof ( *sa ) );
624 /** IPoIB metadata completion operations */
625 static struct ib_completion_queue_operations ipoib_meta_cq_op = {
626 .complete_send = ipoib_meta_complete_send,
627 .complete_recv = ipoib_meta_complete_recv,
631 * Poll IPoIB network device
633 * @v netdev Network device
635 static void ipoib_poll ( struct net_device *netdev ) {
636 struct ipoib_device *ipoib = netdev->priv;
637 struct ib_device *ibdev = ipoib->ibdev;
639 ib_poll_eq ( ibdev );
643 * Enable/disable interrupts on IPoIB network device
645 * @v netdev Network device
646 * @v enable Interrupts should be enabled
648 static void ipoib_irq ( struct net_device *netdev __unused,
649 int enable __unused ) {
650 /* No implementation */
654 * Join IPv4 broadcast multicast group
656 * @v ipoib IPoIB device
657 * @ret rc Return status code
659 static int ipoib_join_broadcast_group ( struct ipoib_device *ipoib ) {
663 if ( ! ipoib->data.qp )
666 /* Attach data queue to broadcast multicast GID */
667 assert ( ipoib->broadcast_attached == 0 );
668 if ( ( rc = ib_mcast_attach ( ipoib->ibdev, ipoib->data.qp,
669 &ipoib->broadcast_gid ) ) != 0 ){
670 DBGC ( ipoib, "IPoIB %p could not attach to broadcast GID: "
671 "%s\n", ipoib, strerror ( rc ) );
674 ipoib->broadcast_attached = 1;
676 /* Initiate broadcast group join */
677 if ( ( rc = ipoib_mc_member_record ( ipoib, &ipoib->broadcast_gid,
679 DBGC ( ipoib, "IPoIB %p could not send broadcast join: %s\n",
680 ipoib, strerror ( rc ) );
684 /* We will set link up on the network device when we receive
685 * the broadcast join response.
692 * Leave IPv4 broadcast multicast group
694 * @v ipoib IPoIB device
696 static void ipoib_leave_broadcast_group ( struct ipoib_device *ipoib ) {
698 /* Detach data queue from broadcast multicast GID */
699 if ( ipoib->broadcast_attached ) {
700 assert ( ipoib->data.qp != NULL );
701 ib_mcast_detach ( ipoib->ibdev, ipoib->data.qp,
702 &ipoib->broadcast_gid );
703 ipoib->broadcast_attached = 0;
708 * Open IPoIB network device
710 * @v netdev Network device
711 * @ret rc Return status code
713 static int ipoib_open ( struct net_device *netdev ) {
714 struct ipoib_device *ipoib = netdev->priv;
715 struct ib_device *ibdev = ipoib->ibdev;
716 struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
720 if ( ( rc = ib_open ( ibdev ) ) != 0 ) {
721 DBGC ( ipoib, "IPoIB %p could not open device: %s\n",
722 ipoib, strerror ( rc ) );
726 /* Allocate metadata queue set */
727 if ( ( rc = ib_create_qset ( ibdev, &ipoib->meta,
728 IPOIB_META_NUM_CQES, &ipoib_meta_cq_op,
729 IPOIB_META_NUM_SEND_WQES,
730 IPOIB_META_NUM_RECV_WQES,
731 IB_QKEY_GMA ) ) != 0 ) {
732 DBGC ( ipoib, "IPoIB %p could not allocate metadata QP: %s\n",
733 ipoib, strerror ( rc ) );
734 goto err_create_meta_qset;
736 ib_qp_set_ownerdata ( ipoib->meta.qp, ipoib );
738 /* Allocate data queue set */
739 if ( ( rc = ib_create_qset ( ibdev, &ipoib->data,
740 IPOIB_DATA_NUM_CQES, &ipoib_data_cq_op,
741 IPOIB_DATA_NUM_SEND_WQES,
742 IPOIB_DATA_NUM_RECV_WQES,
743 IB_QKEY_GMA ) ) != 0 ) {
744 DBGC ( ipoib, "IPoIB %p could not allocate data QP: %s\n",
745 ipoib, strerror ( rc ) );
746 goto err_create_data_qset;
748 ib_qp_set_ownerdata ( ipoib->data.qp, ipoib );
750 /* Update MAC address with data QPN */
751 mac->qpn = htonl ( ipoib->data.qp->qpn );
753 /* Fill receive rings */
754 ib_refill_recv ( ibdev, ipoib->meta.qp );
755 ib_refill_recv ( ibdev, ipoib->data.qp );
757 /* Join broadcast group */
758 if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
759 DBGC ( ipoib, "IPoIB %p could not join broadcast group: %s\n",
760 ipoib, strerror ( rc ) );
761 goto err_join_broadcast;
767 ib_destroy_qset ( ibdev, &ipoib->data );
768 err_create_data_qset:
769 ib_destroy_qset ( ibdev, &ipoib->meta );
770 err_create_meta_qset:
777 * Close IPoIB network device
779 * @v netdev Network device
781 static void ipoib_close ( struct net_device *netdev ) {
782 struct ipoib_device *ipoib = netdev->priv;
783 struct ipoib_mac *mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
785 /* Leave broadcast group */
786 ipoib_leave_broadcast_group ( ipoib );
788 /* Remove data QPN from MAC address */
791 /* Tear down the queues */
792 ib_destroy_qset ( ipoib->ibdev, &ipoib->data );
793 ib_destroy_qset ( ipoib->ibdev, &ipoib->meta );
795 /* Close IB device */
796 ib_close ( ipoib->ibdev );
799 /** IPoIB network device operations */
800 static struct net_device_operations ipoib_operations = {
802 .close = ipoib_close,
803 .transmit = ipoib_transmit,
809 * Update IPoIB dynamic Infiniband parameters
811 * @v ipoib IPoIB device
813 * The Infiniband port GID and partition key will change at runtime,
814 * when the link is established (or lost). The MAC address is based
815 * on the port GID, and the broadcast GID is based on the partition
816 * key. This function recalculates these IPoIB device parameters.
818 static void ipoib_set_ib_params ( struct ipoib_device *ipoib ) {
819 struct ib_device *ibdev = ipoib->ibdev;
820 struct net_device *netdev = ipoib->netdev;
821 struct ipoib_mac *mac;
823 /* Calculate GID portion of MAC address based on port GID */
824 mac = ( ( struct ipoib_mac * ) netdev->ll_addr );
825 memcpy ( &mac->gid, &ibdev->gid, sizeof ( mac->gid ) );
827 /* Calculate broadcast GID based on partition key */
828 memcpy ( &ipoib->broadcast_gid, &ipoib_broadcast.gid,
829 sizeof ( ipoib->broadcast_gid ) );
830 ipoib->broadcast_gid.u.words[2] = htons ( ibdev->pkey );
832 /* Set net device link state to reflect Infiniband link state */
833 if ( ib_link_ok ( ibdev ) ) {
834 netdev_link_up ( netdev );
836 netdev_link_down ( netdev );
841 * Handle link status change
843 * @v ibdev Infiniband device
845 void ipoib_link_state_changed ( struct ib_device *ibdev ) {
846 struct net_device *netdev = ib_get_ownerdata ( ibdev );
847 struct ipoib_device *ipoib = netdev->priv;
850 /* Leave existing broadcast group */
851 ipoib_leave_broadcast_group ( ipoib );
853 /* Update MAC address and broadcast GID based on new port GID
856 ipoib_set_ib_params ( ipoib );
858 /* Join new broadcast group */
859 if ( ( rc = ipoib_join_broadcast_group ( ipoib ) ) != 0 ) {
860 DBGC ( ipoib, "IPoIB %p could not rejoin broadcast group: "
861 "%s\n", ipoib, strerror ( rc ) );
869 * @v ibdev Infiniband device
870 * @ret rc Return status code
872 int ipoib_probe ( struct ib_device *ibdev ) {
873 struct net_device *netdev;
874 struct ipoib_device *ipoib;
877 /* Allocate network device */
878 netdev = alloc_ipoibdev ( sizeof ( *ipoib ) );
881 netdev_init ( netdev, &ipoib_operations );
882 ipoib = netdev->priv;
883 ib_set_ownerdata ( ibdev, netdev );
884 netdev->dev = ibdev->dev;
885 memset ( ipoib, 0, sizeof ( *ipoib ) );
886 ipoib->netdev = netdev;
887 ipoib->ibdev = ibdev;
889 /* Calculate as much of the broadcast GID and the MAC address
890 * as we can. We won't know either of these in full until we
893 ipoib_set_ib_params ( ipoib );
895 /* Register network device */
896 if ( ( rc = register_netdev ( netdev ) ) != 0 )
897 goto err_register_netdev;
902 netdev_nullify ( netdev );
903 netdev_put ( netdev );
908 * Remove IPoIB device
910 * @v ibdev Infiniband device
912 void ipoib_remove ( struct ib_device *ibdev ) {
913 struct net_device *netdev = ib_get_ownerdata ( ibdev );
915 unregister_netdev ( netdev );
916 netdev_nullify ( netdev );
917 netdev_put ( netdev );
921 * Allocate IPoIB device
923 * @v priv_size Size of driver private data
924 * @ret netdev Network device, or NULL
926 struct net_device * alloc_ipoibdev ( size_t priv_size ) {
927 struct net_device *netdev;
929 netdev = alloc_netdev ( priv_size );
931 netdev->ll_protocol = &ipoib_protocol;
932 netdev->ll_broadcast = ( uint8_t * ) &ipoib_broadcast;
933 netdev->max_pkt_len = IB_MAX_PAYLOAD_SIZE;