10 #include <gpxe/if_ether.h>
11 #include <gpxe/pkbuff.h>
12 #include <gpxe/netdevice.h>
15 #include <gpxe/tcpip.h>
21 * The gPXE IP stack is currently implemented on top of the uIP
22 * protocol stack. This file provides wrappers around uIP so that
23 * higher-level protocol implementations do not need to talk directly
24 * to uIP (which has a somewhat baroque API).
28 /* Unique IP datagram identification number */
29 static uint16_t next_ident = 0;
31 struct net_protocol ipv4_protocol;
33 /** An IPv4 address/routing table entry */
34 struct ipv4_miniroute {
35 /** List of miniroutes */
36 struct list_head list;
39 struct net_device *netdev;
40 /** Reference to network device */
41 struct reference netdev_ref;
44 struct in_addr address;
46 struct in_addr netmask;
47 /** Gateway address */
48 struct in_addr gateway;
51 /** List of IPv4 miniroutes */
52 static LIST_HEAD ( miniroutes );
54 /** List of fragment reassembly buffers */
55 static LIST_HEAD ( frag_buffers );
57 static void ipv4_forget_netdev ( struct reference *ref );
60 * Add IPv4 minirouting table entry
62 * @v netdev Network device
63 * @v address IPv4 address
64 * @v netmask Subnet mask
65 * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
66 * @ret miniroute Routing table entry, or NULL
68 static struct ipv4_miniroute * add_ipv4_miniroute ( struct net_device *netdev,
69 struct in_addr address,
70 struct in_addr netmask,
71 struct in_addr gateway ) {
72 struct ipv4_miniroute *miniroute;
74 /* Allocate and populate miniroute structure */
75 miniroute = malloc ( sizeof ( *miniroute ) );
78 DBG ( "IPv4 add %s", inet_ntoa ( address ) );
79 DBG ( "/%s ", inet_ntoa ( netmask ) );
80 if ( gateway.s_addr != INADDR_NONE )
81 DBG ( "gw %s ", inet_ntoa ( gateway ) );
82 DBG ( "via %s\n", netdev_name ( netdev ) );
84 /* Record routing information */
85 miniroute->netdev = netdev;
86 miniroute->address = address;
87 miniroute->netmask = netmask;
88 miniroute->gateway = gateway;
90 /* Add to end of list if we have a gateway, otherwise
93 if ( gateway.s_addr != INADDR_NONE ) {
94 list_add_tail ( &miniroute->list, &miniroutes );
96 list_add ( &miniroute->list, &miniroutes );
99 /* Record reference to net_device */
100 miniroute->netdev_ref.forget = ipv4_forget_netdev;
101 ref_add ( &miniroute->netdev_ref, &netdev->references );
108 * Delete IPv4 minirouting table entry
110 * @v miniroute Routing table entry
112 static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) {
114 DBG ( "IPv4 del %s", inet_ntoa ( miniroute->address ) );
115 DBG ( "/%s ", inet_ntoa ( miniroute->netmask ) );
116 if ( miniroute->gateway.s_addr != INADDR_NONE )
117 DBG ( "gw %s ", inet_ntoa ( miniroute->gateway ) );
118 DBG ( "via %s\n", netdev_name ( miniroute->netdev ) );
120 ref_del ( &miniroute->netdev_ref );
121 list_del ( &miniroute->list );
126 * Forget reference to net_device
128 * @v ref Persistent reference
130 static void ipv4_forget_netdev ( struct reference *ref ) {
131 struct ipv4_miniroute *miniroute
132 = container_of ( ref, struct ipv4_miniroute, netdev_ref );
134 del_ipv4_miniroute ( miniroute );
140 * @v netdev Network device
141 * @v address IPv4 address
142 * @v netmask Subnet mask
143 * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
144 * @ret rc Return status code
147 int add_ipv4_address ( struct net_device *netdev, struct in_addr address,
148 struct in_addr netmask, struct in_addr gateway ) {
149 struct ipv4_miniroute *miniroute;
151 /* Clear any existing address for this net device */
152 del_ipv4_address ( netdev );
154 /* Add new miniroute */
155 miniroute = add_ipv4_miniroute ( netdev, address, netmask, gateway );
163 * Remove IPv4 interface
165 * @v netdev Network device
167 void del_ipv4_address ( struct net_device *netdev ) {
168 struct ipv4_miniroute *miniroute;
170 list_for_each_entry ( miniroute, &miniroutes, list ) {
171 if ( miniroute->netdev == netdev ) {
172 del_ipv4_miniroute ( miniroute );
179 * Perform IPv4 routing
181 * @v dest Final destination address
182 * @ret dest Next hop destination address
183 * @ret miniroute Routing table entry to use, or NULL if no route
185 static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) {
186 struct ipv4_miniroute *miniroute;
190 list_for_each_entry ( miniroute, &miniroutes, list ) {
191 local = ( ( ( dest->s_addr ^ miniroute->address.s_addr )
192 & miniroute->netmask.s_addr ) == 0 );
193 has_gw = ( miniroute->gateway.s_addr != INADDR_NONE );
194 if ( local || has_gw ) {
196 *dest = miniroute->gateway;
205 * Fragment reassembly counter timeout
207 * @v timer Retry timer
208 * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
210 static void ipv4_frag_expired ( struct retry_timer *timer __unused,
213 DBG ( "Fragment reassembly timeout" );
214 /* Free the fragment buffer */
219 * Free fragment buffer
221 * @v fragbug Fragment buffer
223 static void free_fragbuf ( struct frag_buffer *fragbuf ) {
225 free_dma ( fragbuf, sizeof ( *fragbuf ) );
230 * Fragment reassembler
232 * @v pkb Packet buffer, fragment of the datagram
233 * @ret frag_pkb Reassembled packet, or NULL
235 static struct pk_buff * ipv4_reassemble ( struct pk_buff * pkb ) {
236 struct iphdr *iphdr = pkb->data;
237 struct frag_buffer *fragbuf;
240 * Check if the fragment belongs to any fragment series
242 list_for_each_entry ( fragbuf, &frag_buffers, list ) {
243 if ( fragbuf->ident == iphdr->ident &&
244 fragbuf->src.s_addr == iphdr->src.s_addr ) {
246 * Check if the packet is the expected fragment
248 * The offset of the new packet must be equal to the
249 * length of the data accumulated so far (the length of
250 * the reassembled packet buffer
252 if ( pkb_len ( fragbuf->frag_pkb ) ==
253 ( iphdr->frags & IP_MASK_OFFSET ) ) {
255 * Append the contents of the fragment to the
256 * reassembled packet buffer
258 pkb_pull ( pkb, sizeof ( *iphdr ) );
259 memcpy ( pkb_put ( fragbuf->frag_pkb,
261 pkb->data, pkb_len ( pkb ) );
264 /** Check if the fragment series is over */
265 if ( !iphdr->frags & IP_MASK_MOREFRAGS ) {
266 pkb = fragbuf->frag_pkb;
267 free_fragbuf ( fragbuf );
272 /* Discard the fragment series */
273 free_fragbuf ( fragbuf );
280 /** Check if the fragment is the first in the fragment series */
281 if ( iphdr->frags & IP_MASK_MOREFRAGS &&
282 ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
284 /** Create a new fragment buffer */
285 fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
286 fragbuf->ident = iphdr->ident;
287 fragbuf->src = iphdr->src;
289 /* Set up the reassembly packet buffer */
290 fragbuf->frag_pkb = alloc_pkb ( IP_FRAG_PKB_SIZE );
291 pkb_pull ( pkb, sizeof ( *iphdr ) );
292 memcpy ( pkb_put ( fragbuf->frag_pkb, pkb_len ( pkb ) ),
293 pkb->data, pkb_len ( pkb ) );
296 /* Set the reassembly timer */
297 fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT;
298 fragbuf->frag_timer.expired = ipv4_frag_expired;
299 start_timer ( &fragbuf->frag_timer );
301 /* Add the fragment buffer to the list of fragment buffers */
302 list_add ( &fragbuf->list, &frag_buffers );
309 * Add IPv4 pseudo-header checksum to existing checksum
311 * @v pkb Packet buffer
312 * @v csum Existing checksum
313 * @ret csum Updated checksum
315 static uint16_t ipv4_pshdr_chksum ( struct pk_buff *pkb, uint16_t csum ) {
316 struct ipv4_pseudo_header pshdr;
317 struct iphdr *iphdr = pkb->data;
318 size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
320 /* Build pseudo-header */
321 pshdr.src = iphdr->src;
322 pshdr.dest = iphdr->dest;
323 pshdr.zero_padding = 0x00;
324 pshdr.protocol = iphdr->protocol;
325 pshdr.len = htons ( pkb_len ( pkb ) - hdrlen );
327 /* Update the checksum value */
328 return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
332 * Determine link-layer address
334 * @v dest IPv4 destination address
335 * @v src IPv4 source address
336 * @v netdev Network device
337 * @v ll_dest Link-layer destination address buffer
338 * @ret rc Return status code
340 static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src,
341 struct net_device *netdev, uint8_t *ll_dest ) {
342 struct ll_protocol *ll_protocol = netdev->ll_protocol;
343 uint8_t *dest_bytes = ( ( uint8_t * ) &dest );
345 if ( dest.s_addr == INADDR_BROADCAST ) {
346 /* Broadcast address */
347 memcpy ( ll_dest, ll_protocol->ll_broadcast,
348 ll_protocol->ll_addr_len );
350 } else if ( IN_MULTICAST ( dest.s_addr ) ) {
351 /* Special case: IPv4 multicast over Ethernet. This
352 * code may need to be generalised once we find out
353 * what happens for other link layers.
358 ll_dest[3] = dest_bytes[1] & 0x7f;
359 ll_dest[4] = dest_bytes[2];
360 ll_dest[5] = dest_bytes[3];
363 /* Unicast address: resolve via ARP */
364 return arp_resolve ( netdev, &ipv4_protocol, &dest,
372 * @v pkb Packet buffer
373 * @v tcpip Transport-layer protocol
374 * @v st_dest Destination network-layer address
375 * @v trans_csum Transport-layer checksum to complete, or NULL
378 * This function expects a transport-layer segment and prepends the IP header
380 static int ipv4_tx ( struct pk_buff *pkb,
381 struct tcpip_protocol *tcpip_protocol,
382 struct sockaddr_tcpip *st_dest, uint16_t *trans_csum ) {
383 struct iphdr *iphdr = pkb_push ( pkb, sizeof ( *iphdr ) );
384 struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
385 struct ipv4_miniroute *miniroute;
386 struct in_addr next_hop;
387 uint8_t ll_dest[MAX_LL_ADDR_LEN];
390 /* Fill up the IP header, except source address */
391 iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) );
392 iphdr->service = IP_TOS;
393 iphdr->len = htons ( pkb_len ( pkb ) );
394 iphdr->ident = htons ( ++next_ident );
397 iphdr->protocol = tcpip_protocol->tcpip_proto;
399 iphdr->dest = sin_dest->sin_addr;
401 /* Use routing table to identify next hop and transmitting netdev */
402 next_hop = iphdr->dest;
403 miniroute = ipv4_route ( &next_hop );
405 DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr->dest ) );
409 iphdr->src = miniroute->address;
411 /* Determine link-layer destination address */
412 if ( ( rc = ipv4_ll_addr ( next_hop, iphdr->src, miniroute->netdev,
414 DBG ( "IPv4 has no link-layer address for %s\n",
415 inet_ntoa ( iphdr->dest ) );
419 /* Fix up checksums */
421 *trans_csum = ipv4_pshdr_chksum ( pkb, *trans_csum );
422 iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
424 /* Print IP4 header for debugging */
425 DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
426 DBG ( "%s len %d proto %d id %04x csum %04x\n",
427 inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), iphdr->protocol,
428 ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
430 /* Hand off to link layer */
431 return net_tx ( pkb, miniroute->netdev, &ipv4_protocol, ll_dest );
439 * Process incoming packets
441 * @v pkb Packet buffer
442 * @v netdev Network device
443 * @v ll_source Link-layer destination source
445 * This function expects an IP4 network datagram. It processes the headers
446 * and sends it to the transport layer.
448 static int ipv4_rx ( struct pk_buff *pkb, struct net_device *netdev __unused,
449 const void *ll_source __unused ) {
450 struct iphdr *iphdr = pkb->data;
454 struct sockaddr_in sin;
455 struct sockaddr_tcpip st;
460 /* Sanity check the IPv4 header */
461 if ( pkb_len ( pkb ) < sizeof ( *iphdr ) ) {
462 DBG ( "IPv4 packet too short at %d bytes (min %d bytes)\n",
463 pkb_len ( pkb ), sizeof ( *iphdr ) );
466 if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) {
467 DBG ( "IPv4 version %#02x not supported\n", iphdr->verhdrlen );
470 hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
471 if ( hdrlen < sizeof ( *iphdr ) ) {
472 DBG ( "IPv4 header too short at %d bytes (min %d bytes)\n",
473 hdrlen, sizeof ( *iphdr ) );
476 if ( hdrlen > pkb_len ( pkb ) ) {
477 DBG ( "IPv4 header too long at %d bytes "
478 "(packet is %d bytes)\n", hdrlen, pkb_len ( pkb ) );
481 if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) {
482 DBG ( "IPv4 checksum incorrect (is %04x including checksum "
483 "field, should be 0000)\n", csum );
486 len = ntohs ( iphdr->len );
487 if ( len < hdrlen ) {
488 DBG ( "IPv4 length too short at %d bytes "
489 "(header is %d bytes)\n", len, hdrlen );
492 if ( len > pkb_len ( pkb ) ) {
493 DBG ( "IPv4 length too long at %d bytes "
494 "(packet is %d bytes)\n", len, pkb_len ( pkb ) );
498 /* Print IPv4 header for debugging */
499 DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
500 DBG ( "%s len %d proto %d id %04x csum %04x\n",
501 inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
502 ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
504 /* Truncate packet to correct length, calculate pseudo-header
505 * checksum and then strip off the IPv4 header.
507 pkb_unput ( pkb, ( pkb_len ( pkb ) - len ) );
508 pshdr_csum = ipv4_pshdr_chksum ( pkb, TCPIP_EMPTY_CSUM );
509 pkb_pull ( pkb, hdrlen );
511 /* Fragment reassembly */
512 if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) ||
513 ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) {
514 /* Pass the fragment to ipv4_reassemble() which either
515 * returns a fully reassembled packet buffer or NULL.
517 pkb = ipv4_reassemble ( pkb );
522 /* Construct socket addresses and hand off to transport layer */
523 memset ( &src, 0, sizeof ( src ) );
524 src.sin.sin_family = AF_INET;
525 src.sin.sin_addr = iphdr->src;
526 memset ( &dest, 0, sizeof ( dest ) );
527 dest.sin.sin_family = AF_INET;
528 dest.sin.sin_addr = iphdr->dest;
529 return tcpip_rx ( pkb, iphdr->protocol, &src.st, &dest.st, pshdr_csum);
537 * Check existence of IPv4 address for ARP
539 * @v netdev Network device
540 * @v net_addr Network-layer address
541 * @ret rc Return status code
543 static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
544 const struct in_addr *address = net_addr;
545 struct ipv4_miniroute *miniroute;
547 list_for_each_entry ( miniroute, &miniroutes, list ) {
548 if ( ( miniroute->netdev == netdev ) &&
549 ( miniroute->address.s_addr == address->s_addr ) ) {
550 /* Found matching address */
558 * Convert IPv4 address to dotted-quad notation
561 * @ret string IP address in dotted-quad notation
563 char * inet_ntoa ( struct in_addr in ) {
564 static char buf[16]; /* "xxx.xxx.xxx.xxx" */
565 uint8_t *bytes = ( uint8_t * ) ∈
567 sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
572 * Transcribe IP address
574 * @v net_addr IP address
575 * @ret string IP address in dotted-quad notation
578 static const char * ipv4_ntoa ( const void *net_addr ) {
579 return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
583 struct net_protocol ipv4_protocol __net_protocol = {
585 .net_proto = htons ( ETH_P_IP ),
586 .net_addr_len = sizeof ( struct in_addr ),
591 /** IPv4 TCPIP net protocol */
592 struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
594 .sa_family = AF_INET,
598 /** IPv4 ARP protocol */
599 struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
600 .net_protocol = &ipv4_protocol,
601 .check = ipv4_arp_check,