10 #include <gpxe/if_ether.h>
11 #include <gpxe/pkbuff.h>
12 #include <gpxe/netdevice.h>
15 #include <gpxe/tcpip.h>
21 * The gPXE IP stack is currently implemented on top of the uIP
22 * protocol stack. This file provides wrappers around uIP so that
23 * higher-level protocol implementations do not need to talk directly
24 * to uIP (which has a somewhat baroque API).
28 /* Unique IP datagram identification number */
29 static uint16_t next_ident = 0;
31 struct net_protocol ipv4_protocol;
33 /** An IPv4 address/routing table entry */
34 struct ipv4_miniroute {
35 /** List of miniroutes */
36 struct list_head list;
38 struct net_device *netdev;
40 struct in_addr address;
42 struct in_addr netmask;
43 /** Gateway address */
44 struct in_addr gateway;
47 /** List of IPv4 miniroutes */
48 static LIST_HEAD ( miniroutes );
50 /** List of fragment reassembly buffers */
51 static LIST_HEAD ( frag_buffers );
56 * @v netdev Network device
57 * @v address IPv4 address
58 * @v netmask Subnet mask
59 * @v gateway Gateway address (or @c INADDR_NONE for no gateway)
60 * @ret rc Return status code
63 int add_ipv4_address ( struct net_device *netdev, struct in_addr address,
64 struct in_addr netmask, struct in_addr gateway ) {
65 struct ipv4_miniroute *miniroute;
67 /* Allocate and populate miniroute structure */
68 miniroute = malloc ( sizeof ( *miniroute ) );
71 miniroute->netdev = netdev;
72 miniroute->address = address;
73 miniroute->netmask = netmask;
74 miniroute->gateway = gateway;
76 /* Add to end of list if we have a gateway, otherwise to start
79 if ( gateway.s_addr != INADDR_NONE ) {
80 list_add_tail ( &miniroute->list, &miniroutes );
82 list_add ( &miniroute->list, &miniroutes );
88 * Remove IPv4 interface
90 * @v netdev Network device
92 void del_ipv4_address ( struct net_device *netdev ) {
93 struct ipv4_miniroute *miniroute;
95 list_for_each_entry ( miniroute, &miniroutes, list ) {
96 if ( miniroute->netdev == netdev ) {
97 list_del ( &miniroute->list );
104 * Perform IPv4 routing
106 * @v dest Final destination address
107 * @ret dest Next hop destination address
108 * @ret miniroute Routing table entry to use, or NULL if no route
110 static struct ipv4_miniroute * ipv4_route ( struct in_addr *dest ) {
111 struct ipv4_miniroute *miniroute;
115 list_for_each_entry ( miniroute, &miniroutes, list ) {
116 local = ( ( ( dest->s_addr ^ miniroute->address.s_addr )
117 & miniroute->netmask.s_addr ) == 0 );
118 has_gw = ( miniroute->gateway.s_addr != INADDR_NONE );
119 if ( local || has_gw ) {
121 *dest = miniroute->gateway;
130 * Fragment reassembly counter timeout
132 * @v timer Retry timer
133 * @v over If asserted, the timer is greater than @c MAX_TIMEOUT
135 static void ipv4_frag_expired ( struct retry_timer *timer __unused,
138 DBG ( "Fragment reassembly timeout" );
139 /* Free the fragment buffer */
144 * Free fragment buffer
146 * @v fragbug Fragment buffer
148 static void free_fragbuf ( struct frag_buffer *fragbuf ) {
150 free_dma ( fragbuf, sizeof ( *fragbuf ) );
155 * Fragment reassembler
157 * @v pkb Packet buffer, fragment of the datagram
158 * @ret frag_pkb Reassembled packet, or NULL
160 static struct pk_buff * ipv4_reassemble ( struct pk_buff * pkb ) {
161 struct iphdr *iphdr = pkb->data;
162 struct frag_buffer *fragbuf;
165 * Check if the fragment belongs to any fragment series
167 list_for_each_entry ( fragbuf, &frag_buffers, list ) {
168 if ( fragbuf->ident == iphdr->ident &&
169 fragbuf->src.s_addr == iphdr->src.s_addr ) {
171 * Check if the packet is the expected fragment
173 * The offset of the new packet must be equal to the
174 * length of the data accumulated so far (the length of
175 * the reassembled packet buffer
177 if ( pkb_len ( fragbuf->frag_pkb ) ==
178 ( iphdr->frags & IP_MASK_OFFSET ) ) {
180 * Append the contents of the fragment to the
181 * reassembled packet buffer
183 pkb_pull ( pkb, sizeof ( *iphdr ) );
184 memcpy ( pkb_put ( fragbuf->frag_pkb,
186 pkb->data, pkb_len ( pkb ) );
189 /** Check if the fragment series is over */
190 if ( !iphdr->frags & IP_MASK_MOREFRAGS ) {
191 pkb = fragbuf->frag_pkb;
192 free_fragbuf ( fragbuf );
197 /* Discard the fragment series */
198 free_fragbuf ( fragbuf );
205 /** Check if the fragment is the first in the fragment series */
206 if ( iphdr->frags & IP_MASK_MOREFRAGS &&
207 ( ( iphdr->frags & IP_MASK_OFFSET ) == 0 ) ) {
209 /** Create a new fragment buffer */
210 fragbuf = ( struct frag_buffer* ) malloc ( sizeof( *fragbuf ) );
211 fragbuf->ident = iphdr->ident;
212 fragbuf->src = iphdr->src;
214 /* Set up the reassembly packet buffer */
215 fragbuf->frag_pkb = alloc_pkb ( IP_FRAG_PKB_SIZE );
216 pkb_pull ( pkb, sizeof ( *iphdr ) );
217 memcpy ( pkb_put ( fragbuf->frag_pkb, pkb_len ( pkb ) ),
218 pkb->data, pkb_len ( pkb ) );
221 /* Set the reassembly timer */
222 fragbuf->frag_timer.timeout = IP_FRAG_TIMEOUT;
223 fragbuf->frag_timer.expired = ipv4_frag_expired;
224 start_timer ( &fragbuf->frag_timer );
226 /* Add the fragment buffer to the list of fragment buffers */
227 list_add ( &fragbuf->list, &frag_buffers );
235 * Complete the transport-layer checksum
237 * @v pkb Packet buffer
238 * @v tcpip Transport-layer protocol
240 * This function calculates the tcpip
242 static void ipv4_tx_csum ( struct pk_buff *pkb,
243 struct tcpip_protocol *tcpip ) {
244 struct iphdr *iphdr = pkb->data;
245 struct ipv4_pseudo_header pshdr;
246 uint16_t *csum = ( ( ( void * ) iphdr ) + sizeof ( *iphdr )
247 + tcpip->csum_offset );
249 /* Calculate pseudo header */
250 pshdr.src = iphdr->src;
251 pshdr.dest = iphdr->dest;
252 pshdr.zero_padding = 0x00;
253 pshdr.protocol = iphdr->protocol;
254 /* This is only valid when IPv4 does not have options */
255 pshdr.len = htons ( pkb_len ( pkb ) - sizeof ( *iphdr ) );
257 /* Update the checksum value */
258 *csum = tcpip_continue_chksum ( *csum, &pshdr, sizeof ( pshdr ) );
262 * Calculate the transport-layer checksum while processing packets
264 static uint16_t ipv4_rx_csum ( struct pk_buff *pkb __unused,
265 uint8_t trans_proto __unused ) {
267 * This function needs to be implemented. Until then, it will return
268 * 0xffffffff every time
276 * @v pkb Packet buffer
277 * @v tcpip Transport-layer protocol
278 * @v st_dest Destination network-layer address
281 * This function expects a transport-layer segment and prepends the IP header
283 static int ipv4_tx ( struct pk_buff *pkb,
284 struct tcpip_protocol *tcpip_protocol,
285 struct sockaddr_tcpip *st_dest ) {
286 struct iphdr *iphdr = pkb_push ( pkb, sizeof ( *iphdr ) );
287 struct sockaddr_in *sin_dest = ( ( struct sockaddr_in * ) st_dest );
288 struct ipv4_miniroute *miniroute;
289 struct net_device *netdev = NULL;
290 struct in_addr next_hop;
291 uint8_t ll_dest_buf[MAX_LL_ADDR_LEN];
292 const uint8_t *ll_dest = ll_dest_buf;
295 /* Fill up the IP header, except source address */
296 iphdr->verhdrlen = ( ( IP_VER << 4 ) | ( sizeof ( *iphdr ) / 4 ) );
297 iphdr->service = IP_TOS;
298 iphdr->len = htons ( pkb_len ( pkb ) );
299 iphdr->ident = htons ( ++next_ident );
302 iphdr->protocol = tcpip_protocol->tcpip_proto;
304 iphdr->dest = sin_dest->sin_addr;
306 /* Use routing table to identify next hop and transmitting netdev */
307 next_hop = iphdr->dest;
308 miniroute = ipv4_route ( &next_hop );
310 DBG ( "No route to %s\n", inet_ntoa ( iphdr->dest ) );
314 iphdr->src = miniroute->address;
315 netdev = miniroute->netdev;
317 /* Calculate the transport layer checksum */
318 if ( tcpip_protocol->csum_offset > 0 )
319 ipv4_tx_csum ( pkb, tcpip_protocol );
321 /* Calculate header checksum, in network byte order */
322 iphdr->chksum = tcpip_chksum ( iphdr, sizeof ( *iphdr ) );
324 /* Print IP4 header for debugging */
325 DBG ( "IPv4 TX %s->", inet_ntoa ( iphdr->src ) );
326 DBG ( "%s len %d proto %d id %04x csum %04x\n",
327 inet_ntoa ( iphdr->dest ), ntohs ( iphdr->len ), iphdr->protocol,
328 ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
330 /* Determine link-layer destination address */
331 if ( next_hop.s_addr == INADDR_BROADCAST ) {
332 /* Broadcast address */
333 ll_dest = netdev->ll_protocol->ll_broadcast;
334 } else if ( IN_MULTICAST ( next_hop.s_addr ) ) {
335 /* Special case: IPv4 multicast over Ethernet. This
336 * code may need to be generalised once we find out
337 * what happens for other link layers.
339 uint8_t *next_hop_bytes = ( uint8_t * ) &next_hop;
340 ll_dest_buf[0] = 0x01;
341 ll_dest_buf[0] = 0x00;
342 ll_dest_buf[0] = 0x5e;
343 ll_dest_buf[3] = next_hop_bytes[1] & 0x7f;
344 ll_dest_buf[4] = next_hop_bytes[2];
345 ll_dest_buf[5] = next_hop_bytes[3];
347 /* Unicast address: resolve via ARP */
348 if ( ( rc = arp_resolve ( netdev, &ipv4_protocol, &next_hop,
349 &iphdr->src, ll_dest_buf ) ) != 0 ) {
350 DBG ( "No ARP entry for %s\n",
351 inet_ntoa ( iphdr->dest ) );
356 /* Hand off to link layer */
357 return net_tx ( pkb, netdev, &ipv4_protocol, ll_dest );
365 * Process incoming packets
367 * @v pkb Packet buffer
368 * @v netdev Network device
369 * @v ll_source Link-layer destination source
371 * This function expects an IP4 network datagram. It processes the headers
372 * and sends it to the transport layer.
374 static int ipv4_rx ( struct pk_buff *pkb, struct net_device *netdev __unused,
375 const void *ll_source __unused ) {
376 struct iphdr *iphdr = pkb->data;
378 struct sockaddr_in sin;
379 struct sockaddr_tcpip st;
384 if ( pkb_len ( pkb ) < sizeof ( *iphdr ) ) {
385 DBG ( "IP datagram too short (%d bytes)\n", pkb_len ( pkb ) );
389 /* Print IP4 header for debugging */
390 DBG ( "IPv4 RX %s<-", inet_ntoa ( iphdr->dest ) );
391 DBG ( "%s len %d proto %d id %04x csum %04x\n",
392 inet_ntoa ( iphdr->src ), ntohs ( iphdr->len ), iphdr->protocol,
393 ntohs ( iphdr->ident ), ntohs ( iphdr->chksum ) );
395 /* Validate version and header length */
396 if ( iphdr->verhdrlen != 0x45 ) {
397 DBG ( "Bad version and header length %x\n", iphdr->verhdrlen );
401 /* Validate length of IP packet */
402 if ( ntohs ( iphdr->len ) > pkb_len ( pkb ) ) {
403 DBG ( "Inconsistent packet length %d\n",
404 ntohs ( iphdr->len ) );
408 /* Verify the checksum */
409 if ( ( chksum = ipv4_rx_csum ( pkb, iphdr->protocol ) ) != 0xffff ) {
410 DBG ( "Bad checksum %x\n", chksum );
412 /* Fragment reassembly */
413 if ( ( iphdr->frags & htons ( IP_MASK_MOREFRAGS ) ) ||
414 ( ( iphdr->frags & htons ( IP_MASK_OFFSET ) ) != 0 ) ) {
415 /* Pass the fragment to the reassembler ipv4_ressable() which
416 * either returns a fully reassembled packet buffer or NULL.
418 pkb = ipv4_reassemble ( pkb );
424 /* To reduce code size, the following functions are not implemented:
425 * 1. Check the destination address
426 * 2. Check the TTL field
427 * 3. Check the service field
430 /* Construct socket addresses */
431 memset ( &src, 0, sizeof ( src ) );
432 src.sin.sin_family = AF_INET;
433 src.sin.sin_addr = iphdr->src;
434 memset ( &dest, 0, sizeof ( dest ) );
435 dest.sin.sin_family = AF_INET;
436 dest.sin.sin_addr = iphdr->dest;
439 pkb_unput ( pkb, pkb_len ( pkb ) - ntohs ( iphdr->len ) );
440 pkb_pull ( pkb, sizeof ( *iphdr ) );
442 /* Send it to the transport layer */
443 return tcpip_rx ( pkb, iphdr->protocol, &src.st, &dest.st );
451 * Check existence of IPv4 address for ARP
453 * @v netdev Network device
454 * @v net_addr Network-layer address
455 * @ret rc Return status code
457 static int ipv4_arp_check ( struct net_device *netdev, const void *net_addr ) {
458 const struct in_addr *address = net_addr;
459 struct ipv4_miniroute *miniroute;
461 list_for_each_entry ( miniroute, &miniroutes, list ) {
462 if ( ( miniroute->netdev == netdev ) &&
463 ( miniroute->address.s_addr == address->s_addr ) ) {
464 /* Found matching address */
472 * Convert IPv4 address to dotted-quad notation
475 * @ret string IP address in dotted-quad notation
477 char * inet_ntoa ( struct in_addr in ) {
478 static char buf[16]; /* "xxx.xxx.xxx.xxx" */
479 uint8_t *bytes = ( uint8_t * ) ∈
481 sprintf ( buf, "%d.%d.%d.%d", bytes[0], bytes[1], bytes[2], bytes[3] );
486 * Transcribe IP address
488 * @v net_addr IP address
489 * @ret string IP address in dotted-quad notation
492 static const char * ipv4_ntoa ( const void *net_addr ) {
493 return inet_ntoa ( * ( ( struct in_addr * ) net_addr ) );
497 struct net_protocol ipv4_protocol __net_protocol = {
499 .net_proto = htons ( ETH_P_IP ),
500 .net_addr_len = sizeof ( struct in_addr ),
505 /** IPv4 TCPIP net protocol */
506 struct tcpip_net_protocol ipv4_tcpip_protocol __tcpip_net_protocol = {
508 .sa_family = AF_INET,
512 /** IPv4 ARP protocol */
513 struct arp_net_protocol ipv4_arp_protocol __arp_net_protocol = {
514 .net_protocol = &ipv4_protocol,
515 .check = ipv4_arp_check,