10 #include <gpxe/if_ether.h>
11 #include <gpxe/pkbuff.h>
12 #include <gpxe/netdevice.h>
14 #include <gpxe/tcpip.h>
22 /* Unique IP datagram identification number */
23 static uint16_t next_ident = 0;
25 struct net_protocol ipv4_protocol;
27 /** List of IPv4 miniroutes */
28 struct list_head ipv4_miniroutes = LIST_HEAD_INIT ( ipv4_miniroutes );
30 /** List of fragment reassembly buffers */
31 static LIST_HEAD ( frag_buffers );
33 static void ipv4_forget_netdev ( struct reference *ref );
36 * Add IPv4 minirouting table entry
38 * @v netdev Network device
39 * @v address IPv4 address
40 * @v netmask Subnet mask
41 * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
42 * @ret miniroute Routing table entry, or NULL
44 static struct ipv4_miniroute * add_ipv4_miniroute ( struct net_device *netdev,
45 struct in_addr address,
46 struct in_addr netmask,
47 struct in_addr gateway ) {
48 struct ipv4_miniroute *miniroute;
50 DBG ( "IPv4 add %s", inet_ntoa ( address ) );
51 DBG ( "/%s ", inet_ntoa ( netmask ) );
52 if ( gateway.s_addr != INADDR_NONE )
53 DBG ( "gw %s ", inet_ntoa ( gateway ) );
54 DBG ( "via %s\n", netdev->name );
56 /* Allocate and populate miniroute structure */
57 miniroute = malloc ( sizeof ( *miniroute ) );
59 DBG ( "IPv4 could not add miniroute\n" );
63 /* Record routing information */
64 miniroute->netdev = netdev;
65 miniroute->address = address;
66 miniroute->netmask = netmask;
67 miniroute->gateway = gateway;
69 /* Add to end of list if we have a gateway, otherwise
72 if ( gateway.s_addr != INADDR_NONE ) {
73 list_add_tail ( &miniroute->list, &ipv4_miniroutes );
75 list_add ( &miniroute->list, &ipv4_miniroutes );
78 /* Record reference to net_device */
79 miniroute->netdev_ref.forget = ipv4_forget_netdev;
80 ref_add ( &miniroute->netdev_ref, &netdev->references );
86 * Delete IPv4 minirouting table entry
88 * @v miniroute Routing table entry
90 static void del_ipv4_miniroute ( struct ipv4_miniroute *miniroute ) {
92 DBG ( "IPv4 del %s", inet_ntoa ( miniroute->address ) );
93 DBG ( "/%s ", inet_ntoa ( miniroute->netmask ) );
94 if ( miniroute->gateway.s_addr != INADDR_NONE )
95 DBG ( "gw %s ", inet_ntoa ( miniroute->gateway ) );
96 DBG ( "via %s\n", miniroute->netdev->name );
98 ref_del ( &miniroute->netdev_ref );
99 list_del ( &miniroute->list );
104 * Forget reference to net_device
106 * @v ref Persistent reference
108 static void ipv4_forget_netdev ( struct reference *ref ) {
109 struct ipv4_miniroute *miniroute
110 = container_of ( ref, struct ipv4_miniroute, netdev_ref );
112 del_ipv4_miniroute ( miniroute );
118 * @v netdev Network device
119 * @v address IPv4 address
120 * @v netmask Subnet mask
121 * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
122 * @ret rc Return status code
125 int add_ipv4_address ( struct net_device *netdev, struct in_addr address,
126 struct in_addr netmask, struct in_addr gateway ) {
127 struct ipv4_miniroute *miniroute;
129 /* Clear any existing address for this net device */
130 del_ipv4_address ( netdev );
132 /* Add new miniroute */
133 miniroute = add_ipv4_miniroute ( netdev, address, netmask, gateway );
141 * Remove IPv4 interface
143 * @v netdev Network device
145 void del_ipv4_address ( struct net_device *netdev ) {
146 struct ipv4_miniroute *miniroute;
148 list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
149 if ( miniroute->netdev == netdev ) {
150 del_ipv4_miniroute ( miniroute );
157 * Perform IPv4 routing
159 * @v dest Final destination address
160 * @ret dest Next hop destination address
161 * @ret miniroute Routing table entry to use, or NULL if no route
163 static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) {
164 struct ipv4_miniroute *miniroute;
168 list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
169 local = ( ( ( dest->s_addr ^ miniroute->address.s_addr )
170 & miniroute->netmask.s_addr ) == 0 );
171 has_gw = ( miniroute->gateway.s_addr != INADDR_NONE );
172 if ( local || has_gw ) {
174 *dest = miniroute->gateway;
183 * Fragment reassembly counter timeout
185 * @v timer Retry timer
186 * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
188 static void ipv4_frag_expired ( struct retry_timer *timer __unused,
191 DBG ( "Fragment reassembly timeout" );
192 /* Free the fragment buffer */
197 * Free fragment buffer
199 * @v fragbug Fragment buffer
201 static void free_fragbuf ( struct frag_buffer *fragbuf ) {
203 free_dma ( fragbuf, sizeof ( *fragbuf ) );
208 * Fragment reassembler
210 * @v pkb Packet buffer, fragment of the datagram
211 * @ret frag_pkb Reassembled packet, or NULL
213 static struct pk_buff * ipv4_reassemble ( struct pk_buff * pkb ) {
214 struct iphdr *iphdr = pkb->data;
215 struct frag_buffer *fragbuf;
218 * Check if the fragment belongs to any fragment series
220 list_for_each_entry ( fragbuf, &frag_buffers, list ) {
221 if ( fragbuf->ident == iphdr->ident &&
222 fragbuf->src.s_addr == iphdr->src.s_addr ) {
224 * Check if the packet is the expected fragment
226 * The offset of the new packet must be equal to the
227 * length of the data accumulated so far (the length of
228 * the reassembled packet buffer
230 if ( pkb_len ( fragbuf->frag_pkb ) ==
231 ( iphdr->frags & IP_MASK_OFFSET ) ) {
233 * Append the contents of the fragment to the
234 * reassembled packet buffer
236 pkb_pull ( pkb, sizeof ( *iphdr ) );
237 memcpy ( pkb_put ( fragbuf->frag_pkb,
239 pkb->data, pkb_len ( pkb ) );
242 /** Check if the fragment series is over */
243 if ( !iphdr->frags & IP_MASK_MOREFRAGS ) {
244 pkb = fragbuf->frag_pkb;
245 free_fragbuf ( fragbuf );
250 /* Discard the fragment series */
251 free_fragbuf ( fragbuf );
258 /** Check if the fragment is the first in the fragment series */
259 if ( iphdr->frags & IP_MASK_MOREFRAGS &&
260 ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
262 /** Create a new fragment buffer */
263 fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
264 fragbuf->ident = iphdr->ident;
265 fragbuf->src = iphdr->src;
267 /* Set up the reassembly packet buffer */
268 fragbuf->frag_pkb = alloc_pkb ( IP_FRAG_PKB_SIZE );
269 pkb_pull ( pkb, sizeof ( *iphdr ) );
270 memcpy ( pkb_put ( fragbuf->frag_pkb, pkb_len ( pkb ) ),
271 pkb->data, pkb_len ( pkb ) );
274 /* Set the reassembly timer */
275 fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT;
276 fragbuf->frag_timer.expired = ipv4_frag_expired;
277 start_timer ( &fragbuf->frag_timer );
279 /* Add the fragment buffer to the list of fragment buffers */
280 list_add ( &fragbuf->list, &frag_buffers );
287 * Add IPv4 pseudo-header checksum to existing checksum
289 * @v pkb Packet buffer
290 * @v csum Existing checksum
291 * @ret csum Updated checksum
293 static uint16_t ipv4_pshdr_chksum ( struct pk_buff *pkb, uint16_t csum ) {
294 struct ipv4_pseudo_header pshdr;
295 struct iphdr *iphdr = pkb->data;
296 size_t hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
298 /* Build pseudo-header */
299 pshdr.src = iphdr->src;
300 pshdr.dest = iphdr->dest;
301 pshdr.zero_padding = 0x00;
302 pshdr.protocol = iphdr->protocol;
303 pshdr.len = htons ( pkb_len ( pkb ) - hdrlen );
305 /* Update the checksum value */
306 return tcpip_continue_chksum ( csum, &pshdr, sizeof ( pshdr ) );
310 * Determine link-layer address
312 * @v dest IPv4 destination address
313 * @v src IPv4 source address
314 * @v netdev Network device
315 * @v ll_dest Link-layer destination address buffer
316 * @ret rc Return status code
318 static int ipv4_ll_addr ( struct in_addr dest, struct in_addr src,
319 struct net_device *netdev, uint8_t *ll_dest ) {
320 struct ll_protocol *ll_protocol = netdev->ll_protocol;
321 uint8_t *dest_bytes = ( ( uint8_t * ) &dest );
323 if ( dest.s_addr == INADDR_BROADCAST ) {
324 /* Broadcast address */
325 memcpy ( ll_dest, ll_protocol->ll_broadcast,
326 ll_protocol->ll_addr_len );
328 } else if ( IN_MULTICAST ( dest.s_addr ) ) {
329 /* Special case: IPv4 multicast over Ethernet. This
330 * code may need to be generalised once we find out
331 * what happens for other link layers.
336 ll_dest[3] = dest_bytes[1] & 0x7f;
337 ll_dest[4] = dest_bytes[2];
338 ll_dest[5] = dest_bytes[3];
341 /* Unicast address: resolve via ARP */
342 return arp_resolve ( netdev, &ipv4_protocol, &dest,
350 * @v pkb Packet buffer
351 * @v tcpip Transport-layer protocol
352 * @v st_dest Destination network-layer address
353 * @v netdev Network device to use if no route found, or NULL
354 * @v trans_csum Transport-layer checksum to complete, or NULL
357 * This function expects a transport-layer segment and prepends the IP header
359 static int ipv4_tx ( struct pk_buff *pkb,
360 struct tcpip_protocol *tcpip_protocol,
361 struct sockaddr_tcpip *st_dest,
362 struct net_device *netdev,
363 uint16_t *trans_csum ) {
364 struct iphdr *iphdr = pkb_push ( pkb, sizeof ( *iphdr ) );
365 struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
366 struct ipv4_miniroute *miniroute;
367 struct in_addr next_hop;
368 uint8_t ll_dest[MAX_LL_ADDR_LEN];
371 /* Fill up the IP header, except source address */
372 memset ( iphdr, 0, sizeof ( *iphdr ) );
373 iphdr->verhdrlen = ( IP_VER | ( sizeof ( *iphdr ) / 4 ) );
374 iphdr->service = IP_TOS;
375 iphdr->len = htons ( pkb_len ( pkb ) );
376 iphdr->ident = htons ( ++next_ident );
378 iphdr->protocol = tcpip_protocol->tcpip_proto;
379 iphdr->dest = sin_dest->sin_addr;
381 /* Use routing table to identify next hop and transmitting netdev */
382 next_hop = iphdr->dest;
383 if ( ( miniroute = ipv4_route ( &next_hop ) ) ) {
384 iphdr->src = miniroute->address;
385 netdev = miniroute->netdev;
388 DBG ( "IPv4 has no route to %s\n", inet_ntoa ( iphdr->dest ) );
393 /* Determine link-layer destination address */
394 if ( ( rc = ipv4_ll_addr ( next_hop, iphdr->src, netdev,
396 DBG ( "IPv4 has no link-layer address for %s: %s\n",
397 inet_ntoa ( next_hop ), strerror ( rc ) );
401 /* Fix up checksums */
403 *trans_csum = ipv4_pshdr_chksum ( pkb, *trans_csum );
404 iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
406 /* Print IP4 header for debugging */
407 DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
408 DBG ( "%s len %d proto %d id %04x csum %04x\n",
409 inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), iphdr->protocol,
410 ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
412 /* Hand off to link layer */
413 if ( ( rc = net_tx ( pkb, netdev, &ipv4_protocol, ll_dest ) ) != 0 ) {
414 DBG ( "IPv4 could not transmit packet via %s: %s\n",
415 netdev->name, strerror ( rc ) );
427 * Process incoming packets
429 * @v pkb Packet buffer
430 * @v netdev Network device
431 * @v ll_source Link-layer destination source
433 * This function expects an IP4 network datagram. It processes the headers
434 * and sends it to the transport layer.
436 static int ipv4_rx ( struct pk_buff *pkb, struct net_device *netdev __unused,
437 const void *ll_source __unused ) {
438 struct iphdr *iphdr = pkb->data;
442 struct sockaddr_in sin;
443 struct sockaddr_tcpip st;
449 /* Sanity check the IPv4 header */
450 if ( pkb_len ( pkb ) < sizeof ( *iphdr ) ) {
451 DBG ( "IPv4 packet too short at %d bytes (min %d bytes)\n",
452 pkb_len ( pkb ), sizeof ( *iphdr ) );
455 if ( ( iphdr->verhdrlen & IP_MASK_VER ) != IP_VER ) {
456 DBG ( "IPv4 version %#02x not supported\n", iphdr->verhdrlen );
459 hdrlen = ( ( iphdr->verhdrlen & IP_MASK_HLEN ) * 4 );
460 if ( hdrlen < sizeof ( *iphdr ) ) {
461 DBG ( "IPv4 header too short at %d bytes (min %d bytes)\n",
462 hdrlen, sizeof ( *iphdr ) );
465 if ( hdrlen > pkb_len ( pkb ) ) {
466 DBG ( "IPv4 header too long at %d bytes "
467 "(packet is %d bytes)\n", hdrlen, pkb_len ( pkb ) );
470 if ( ( csum = tcpip_chksum ( iphdr, hdrlen ) ) != 0 ) {
471 DBG ( "IPv4 checksum incorrect (is %04x including checksum "
472 "field, should be 0000)\n", csum );
475 len = ntohs ( iphdr->len );
476 if ( len < hdrlen ) {
477 DBG ( "IPv4 length too short at %d bytes "
478 "(header is %d bytes)\n", len, hdrlen );
481 if ( len > pkb_len ( pkb ) ) {
482 DBG ( "IPv4 length too long at %d bytes "
483 "(packet is %d bytes)\n", len, pkb_len ( pkb ) );
487 /* Print IPv4 header for debugging */
488 DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
489 DBG ( "%s len %d proto %d id %04x csum %04x\n",
490 inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
491 ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
493 /* Truncate packet to correct length, calculate pseudo-header
494 * checksum and then strip off the IPv4 header.
496 pkb_unput ( pkb, ( pkb_len ( pkb ) - len ) );
497 pshdr_csum = ipv4_pshdr_chksum ( pkb, TCPIP_EMPTY_CSUM );
498 pkb_pull ( pkb, hdrlen );
500 /* Fragment reassembly */
501 if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) ||
502 ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) {
503 /* Pass the fragment to ipv4_reassemble() which either
504 * returns a fully reassembled packet buffer or NULL.
506 pkb = ipv4_reassemble ( pkb );
511 /* Construct socket addresses and hand off to transport layer */
512 memset ( &src, 0, sizeof ( src ) );
513 src.sin.sin_family = AF_INET;
514 src.sin.sin_addr = iphdr->src;
515 memset ( &dest, 0, sizeof ( dest ) );
516 dest.sin.sin_family = AF_INET;
517 dest.sin.sin_addr = iphdr->dest;
518 if ( ( rc = tcpip_rx ( pkb, iphdr->protocol, &src.st,
519 &dest.st, pshdr_csum ) ) != 0 ) {
520 DBG ( "IPv4 received packet rejected by stack: %s\n",
533 * Check existence of IPv4 address for ARP
535 * @v netdev Network device
536 * @v net_addr Network-layer address
537 * @ret rc Return status code
539 static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
540 const struct in_addr *address = net_addr;
541 struct ipv4_miniroute *miniroute;
543 list_for_each_entry ( miniroute, &ipv4_miniroutes, list ) {
544 if ( ( miniroute->netdev == netdev ) &&
545 ( miniroute->address.s_addr == address->s_addr ) ) {
546 /* Found matching address */
554 * Convert IPv4 address to dotted-quad notation
557 * @ret string IP address in dotted-quad notation
559 char * inet_ntoa ( struct in_addr in ) {
560 static char buf[16]; /* "xxx.xxx.xxx.xxx" */
561 uint8_t *bytes = ( uint8_t * ) ∈
563 sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
568 * Transcribe IP address
570 * @v net_addr IP address
571 * @ret string IP address in dotted-quad notation
574 static const char * ipv4_ntoa ( const void *net_addr ) {
575 return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
579 struct net_protocol ipv4_protocol __net_protocol = {
581 .net_proto = htons ( ETH_P_IP ),
582 .net_addr_len = sizeof ( struct in_addr ),
587 /** IPv4 TCPIP net protocol */
588 struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
590 .sa_family = AF_INET,
594 /** IPv4 ARP protocol */
595 struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
596 .net_protocol = &ipv4_protocol,
597 .check = ipv4_arp_check,