10 #include <gpxe/if_ether.h>
11 #include <gpxe/pkbuff.h>
12 #include <gpxe/netdevice.h>
15 #include <gpxe/tcpip.h>
21 * The gPXE IP stack is currently implemented on top of the uIP
22 * protocol stack. This file provides wrappers around uIP so that
23 * higher-level protocol implementations do not need to talk directly
24 * to uIP (which has a somewhat baroque API).
28 /* Unique IP datagram identification number */
29 static uint16_t next_ident = 0;
31 struct net_protocol ipv4_protocol;
33 /** An IPv4 address/routing table entry */
34 struct ipv4_miniroute {
35 /** List of miniroutes */
36 struct list_head list;
38 struct net_device *netdev;
40 struct in_addr address;
42 struct in_addr netmask;
43 /** Gateway address */
44 struct in_addr gateway;
47 /** List of IPv4 miniroutes */
48 static LIST_HEAD ( miniroutes );
50 /** List of fragment reassembly buffers */
51 static LIST_HEAD ( frag_buffers );
56 * @v netdev Network device
57 * @v address IPv4 address
58 * @v netmask Subnet mask
59 * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
60 * @ret rc Return status code
63 int add_ipv4_address ( struct net_device *netdev, struct in_addr address,
64 struct in_addr netmask, struct in_addr gateway ) {
65 struct ipv4_miniroute *miniroute;
67 /* Allocate and populate miniroute structure */
68 miniroute = malloc ( sizeof ( *miniroute ) );
71 miniroute->netdev = netdev;
72 miniroute->address = address;
73 miniroute->netmask = netmask;
74 miniroute->gateway = gateway;
76 /* Add to end of list if we have a gateway, otherwise to start
79 if ( gateway.s_addr != INADDR_NONE ) {
80 list_add_tail ( &miniroute->list, &miniroutes );
82 list_add ( &miniroute->list, &miniroutes );
88 * Remove IPv4 interface
90 * @v netdev Network device
92 void del_ipv4_address ( struct net_device *netdev ) {
93 struct ipv4_miniroute *miniroute;
95 list_for_each_entry ( miniroute, &miniroutes, list ) {
96 if ( miniroute->netdev == netdev ) {
97 list_del ( &miniroute->list );
104 * Dump IPv4 packet header
106 * @v iphdr IPv4 header
108 static void ipv4_dump ( struct iphdr *iphdr __unused ) {
110 DBG ( "IP4 %p transmitting %p+%d ident %d protocol %d header-csum %x\n",
111 &ipv4_protocol, iphdr, ntohs ( iphdr->len ), ntohs ( iphdr->ident ),
112 iphdr->protocol, ntohs ( iphdr->chksum ) );
113 DBG ( "src %s, dest %s\n", inet_ntoa ( iphdr->src ), inet_ntoa ( iphdr->dest ) );
117 * Fragment reassembly counter timeout
119 * @v timer Retry timer
120 * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
122 static void ipv4_frag_expired ( struct retry_timer *timer __unused,
125 DBG ( "Fragment reassembly timeout" );
126 /* Free the fragment buffer */
131 * Free fragment buffer
133 * @v fragbug Fragment buffer
135 static void free_fragbuf ( struct frag_buffer *fragbuf ) {
137 free_dma ( fragbuf, sizeof ( *fragbuf ) );
142 * Fragment reassembler
144 * @v pkb Packet buffer, fragment of the datagram
145 * @ret frag_pkb Reassembled packet, or NULL
147 static struct pk_buff * ipv4_reassemble ( struct pk_buff * pkb ) {
148 struct iphdr *iphdr = pkb->data;
149 struct frag_buffer *fragbuf;
152 * Check if the fragment belongs to any fragment series
154 list_for_each_entry ( fragbuf, &frag_buffers, list ) {
155 if ( fragbuf->ident == iphdr->ident &&
156 fragbuf->src.s_addr == iphdr->src.s_addr ) {
158 * Check if the packet is the expected fragment
160 * The offset of the new packet must be equal to the
161 * length of the data accumulated so far (the length of
162 * the reassembled packet buffer
164 if ( pkb_len ( fragbuf->frag_pkb ) ==
165 ( iphdr->frags & IP_MASK_OFFSET ) ) {
167 * Append the contents of the fragment to the
168 * reassembled packet buffer
170 pkb_pull ( pkb, sizeof ( *iphdr ) );
171 memcpy ( pkb_put ( fragbuf->frag_pkb,
173 pkb->data, pkb_len ( pkb ) );
176 /** Check if the fragment series is over */
177 if ( !iphdr->frags & IP_MASK_MOREFRAGS ) {
178 pkb = fragbuf->frag_pkb;
179 free_fragbuf ( fragbuf );
184 /* Discard the fragment series */
185 free_fragbuf ( fragbuf );
192 /** Check if the fragment is the first in the fragment series */
193 if ( iphdr->frags & IP_MASK_MOREFRAGS &&
194 ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
196 /** Create a new fragment buffer */
197 fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
198 fragbuf->ident = iphdr->ident;
199 fragbuf->src = iphdr->src;
201 /* Set up the reassembly packet buffer */
202 fragbuf->frag_pkb = alloc_pkb ( IP_FRAG_PKB_SIZE );
203 pkb_pull ( pkb, sizeof ( *iphdr ) );
204 memcpy ( pkb_put ( fragbuf->frag_pkb, pkb_len ( pkb ) ),
205 pkb->data, pkb_len ( pkb ) );
208 /* Set the reassembly timer */
209 fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT;
210 fragbuf->frag_timer.expired = ipv4_frag_expired;
211 start_timer ( &fragbuf->frag_timer );
213 /* Add the fragment buffer to the list of fragment buffers */
214 list_add ( &fragbuf->list, &frag_buffers );
222 * Complete the transport-layer checksum
224 * @v pkb Packet buffer
225 * @v tcpip Transport-layer protocol
227 * This function calculates the tcpip
229 static void ipv4_tx_csum ( struct pk_buff *pkb,
230 struct tcpip_protocol *tcpip ) {
231 struct iphdr *iphdr = pkb->data;
232 struct ipv4_pseudo_header pshdr;
233 uint16_t *csum = ( ( ( void * ) iphdr ) + sizeof ( *iphdr )
234 + tcpip->csum_offset );
236 /* Calculate pseudo header */
237 pshdr.src = iphdr->src;
238 pshdr.dest = iphdr->dest;
239 pshdr.zero_padding = 0x00;
240 pshdr.protocol = iphdr->protocol;
241 /* This is only valid when IPv4 does not have options */
242 pshdr.len = htons ( pkb_len ( pkb ) - sizeof ( *iphdr ) );
244 /* Update the checksum value */
245 *csum = tcpip_continue_chksum ( *csum, &pshdr, sizeof ( pshdr ) );
249 * Calculate the transport-layer checksum while processing packets
251 static uint16_t ipv4_rx_csum ( struct pk_buff *pkb __unused,
252 uint8_t trans_proto __unused ) {
254 * This function needs to be implemented. Until then, it will return
255 * 0xffffffff every time
263 * @v pkb Packet buffer
264 * @v tcpip Transport-layer protocol
265 * @v st_dest Destination network-layer address
268 * This function expects a transport-layer segment and prepends the IP header
270 static int ipv4_tx ( struct pk_buff *pkb,
271 struct tcpip_protocol *tcpip_protocol,
272 struct sockaddr_tcpip *st_dest ) {
273 struct iphdr *iphdr = pkb_push ( pkb, sizeof ( *iphdr ) );
274 struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
275 struct ipv4_miniroute *miniroute;
276 struct net_device *netdev = NULL;
277 struct in_addr next_hop;
278 uint8_t ll_dest_buf[MAX_LL_ADDR_LEN];
279 const uint8_t *ll_dest = ll_dest_buf;
282 /* Fill up the IP header, except source address */
283 iphdr->verhdrlen = ( IP_VER << 4 ) | ( sizeof ( *iphdr ) / 4 );
284 iphdr->service = IP_TOS;
285 iphdr->len = htons ( pkb_len ( pkb ) );
286 iphdr->ident = htons ( next_ident++ );
289 iphdr->protocol = tcpip_protocol->tcpip_proto;
291 /* Copy destination address */
292 iphdr->dest = sin_dest->sin_addr;
295 * All fields in the IP header filled in except the source network
296 * address (which requires routing) and the header checksum (which
297 * requires the source network address). As the pseudo header requires
298 * the source address as well and the transport-layer checksum is
299 * updated after routing.
302 /* Use routing table to identify next hop and transmitting netdev */
303 next_hop = iphdr->dest;
304 list_for_each_entry ( miniroute, &miniroutes, list ) {
307 local = ( ( ( iphdr->dest.s_addr ^ miniroute->address.s_addr )
308 & miniroute->netmask.s_addr ) == 0 );
309 has_gw = ( miniroute->gateway.s_addr != INADDR_NONE );
310 if ( local || has_gw ) {
311 netdev = miniroute->netdev;
312 iphdr->src = miniroute->address;
314 next_hop = miniroute->gateway;
318 /* Abort if no network device identified */
320 DBG ( "No route to %s\n", inet_ntoa ( iphdr->dest ) );
325 /* Calculate the transport layer checksum */
326 if ( tcpip_protocol->csum_offset > 0 ) {
327 ipv4_tx_csum ( pkb, tcpip_protocol );
330 /* Calculate header checksum, in network byte order */
332 iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
334 /* Print IP4 header for debugging */
337 /* Determine link-layer destination address */
338 if ( next_hop.s_addr == INADDR_BROADCAST ) {
339 /* Broadcast address */
340 ll_dest = netdev->ll_protocol->ll_broadcast;
341 } else if ( IN_MULTICAST ( next_hop.s_addr ) ) {
342 /* Special case: IPv4 multicast over Ethernet. This
343 * code may need to be generalised once we find out
344 * what happens for other link layers.
346 uint8_t *next_hop_bytes = ( uint8_t * ) &next_hop;
347 ll_dest_buf[0] = 0x01;
348 ll_dest_buf[0] = 0x00;
349 ll_dest_buf[0] = 0x5e;
350 ll_dest_buf[3] = next_hop_bytes[1] & 0x7f;
351 ll_dest_buf[4] = next_hop_bytes[2];
352 ll_dest_buf[5] = next_hop_bytes[3];
354 /* Unicast address: resolve via ARP */
355 if ( ( rc = arp_resolve ( netdev, &ipv4_protocol, &next_hop,
356 &iphdr->src, ll_dest_buf ) ) != 0 ) {
357 DBG ( "No ARP entry for %s\n",
358 inet_ntoa ( iphdr->dest ) );
363 /* Hand off to link layer */
364 return net_tx ( pkb, netdev, &ipv4_protocol, ll_dest );
372 * Process incoming packets
374 * @v pkb Packet buffer
375 * @v netdev Network device
376 * @v ll_source Link-layer destination source
378 * This function expects an IP4 network datagram. It processes the headers
379 * and sends it to the transport layer.
381 static int ipv4_rx ( struct pk_buff *pkb, struct net_device *netdev __unused,
382 const void *ll_source __unused ) {
383 struct iphdr *iphdr = pkb->data;
385 struct sockaddr_in sin;
386 struct sockaddr_tcpip st;
391 if ( pkb_len ( pkb ) < sizeof ( *iphdr ) ) {
392 DBG ( "IP datagram too short (%d bytes)\n",
397 /* Print IP4 header for debugging */
400 /* Validate version and header length */
401 if ( iphdr->verhdrlen != 0x45 ) {
402 DBG ( "Bad version and header length %x\n", iphdr->verhdrlen );
406 /* Validate length of IP packet */
407 if ( ntohs ( iphdr->len ) > pkb_len ( pkb ) ) {
408 DBG ( "Inconsistent packet length %d\n",
409 ntohs ( iphdr->len ) );
413 /* Verify the checksum */
414 if ( ( chksum = ipv4_rx_csum ( pkb, iphdr->protocol ) ) != 0xffff ) {
415 DBG ( "Bad checksum %x\n", chksum );
417 /* Fragment reassembly */
418 if ( iphdr->frags & IP_MASK_MOREFRAGS ||
419 ( !iphdr->frags & IP_MASK_MOREFRAGS &&
420 iphdr->frags & IP_MASK_OFFSET != 0 ) ) {
421 /* Pass the fragment to the reassembler ipv4_ressable() which
422 * either returns a fully reassembled packet buffer or NULL.
424 pkb = ipv4_reassemble ( pkb );
430 /* To reduce code size, the following functions are not implemented:
431 * 1. Check the destination address
432 * 2. Check the TTL field
433 * 3. Check the service field
436 /* Construct socket addresses */
437 memset ( &src, 0, sizeof ( src ) );
438 src.sin.sin_family = AF_INET;
439 src.sin.sin_addr = iphdr->src;
440 memset ( &dest, 0, sizeof ( dest ) );
441 dest.sin.sin_family = AF_INET;
442 dest.sin.sin_addr = iphdr->dest;
445 pkb_pull ( pkb, sizeof ( *iphdr ) );
446 pkb_unput ( pkb, pkb_len ( pkb ) - htons ( iphdr->len ) );
448 /* Send it to the transport layer */
449 return tcpip_rx ( pkb, iphdr->protocol, &src.st, &dest.st );
453 * Check existence of IPv4 address for ARP
455 * @v netdev Network device
456 * @v net_addr Network-layer address
457 * @ret rc Return status code
459 static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
460 const struct in_addr *address = net_addr;
461 struct ipv4_miniroute *miniroute;
463 list_for_each_entry ( miniroute, &miniroutes, list ) {
464 if ( ( miniroute->netdev == netdev ) &&
465 ( miniroute->address.s_addr == address->s_addr ) ) {
466 /* Found matching address */
474 * Convert IPv4 address to dotted-quad notation
477 * @ret string IP address in dotted-quad notation
479 char * inet_ntoa ( struct in_addr in ) {
480 static char buf[16]; /* "xxx.xxx.xxx.xxx" */
481 uint8_t *bytes = ( uint8_t * ) ∈
483 sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
488 * Transcribe IP address
490 * @v net_addr IP address
491 * @ret string IP address in dotted-quad notation
494 static const char * ipv4_ntoa ( const void *net_addr ) {
495 return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
499 struct net_protocol ipv4_protocol = {
501 .net_proto = htons ( ETH_P_IP ),
502 .net_addr_len = sizeof ( struct in_addr ),
507 NET_PROTOCOL ( ipv4_protocol );
509 /** IPv4 TCPIP net protocol */
510 struct tcpip_net_protocol ipv4_tcpip_protocol = {
512 .sa_family = AF_INET,
516 TCPIP_NET_PROTOCOL ( ipv4_tcpip_protocol );
518 /** IPv4 ARP protocol */
519 struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
520 .net_protocol = &ipv4_protocol,
521 .check = ipv4_arp_check,