[crypto] Add our own general-purpose cipher-block chaining routines
[people/asdlkf/gpxe.git] / src / crypto / axtls / aes.c
1 /*
2  *  Copyright(C) 2006 Cameron Rich
3  *
4  *  This library is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU Lesser General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  This library is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU Lesser General Public License for more details.
13  *
14  *  You should have received a copy of the GNU Lesser General Public License
15  *  along with this library; if not, write to the Free Software
16  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17  */
18
19 /**
20  * AES implementation - this is a small code version. There are much faster
21  * versions around but they are much larger in size (i.e. they use large 
22  * submix tables).
23  */
24
25 #include <string.h>
26 #include "crypto.h"
27
28 /* all commented out in skeleton mode */
29 #ifndef CONFIG_SSL_SKELETON_MODE
30
31 #define rot1(x) (((x) << 24) | ((x) >> 8))
32 #define rot2(x) (((x) << 16) | ((x) >> 16))
33 #define rot3(x) (((x) <<  8) | ((x) >> 24))
34
35 /* 
36  * This cute trick does 4 'mul by two' at once.  Stolen from
37  * Dr B. R. Gladman <brg@gladman.uk.net> but I'm sure the u-(u>>7) is
38  * a standard graphics trick
39  * The key to this is that we need to xor with 0x1b if the top bit is set.
40  * a 1xxx xxxx   0xxx 0xxx First we mask the 7bit,
41  * b 1000 0000   0000 0000 then we shift right by 7 putting the 7bit in 0bit,
42  * c 0000 0001   0000 0000 we then subtract (c) from (b)
43  * d 0111 1111   0000 0000 and now we and with our mask
44  * e 0001 1011   0000 0000
45  */
46 #define mt  0x80808080
47 #define ml  0x7f7f7f7f
48 #define mh  0xfefefefe
49 #define mm  0x1b1b1b1b
50 #define mul2(x,t)       ((t)=((x)&mt), \
51                         ((((x)+(x))&mh)^(((t)-((t)>>7))&mm)))
52
53 #define inv_mix_col(x,f2,f4,f8,f9) (\
54                         (f2)=mul2(x,f2), \
55                         (f4)=mul2(f2,f4), \
56                         (f8)=mul2(f4,f8), \
57                         (f9)=(x)^(f8), \
58                         (f8)=((f2)^(f4)^(f8)), \
59                         (f2)^=(f9), \
60                         (f4)^=(f9), \
61                         (f8)^=rot3(f2), \
62                         (f8)^=rot2(f4), \
63                         (f8)^rot1(f9))
64
65 /* some macros to do endian independent byte extraction */
66 #define n2l(c,l) l=ntohl(*c); c++
67 #define l2n(l,c) *c++=htonl(l)
68
69 /*
70  * AES S-box
71  */
72 static const uint8_t aes_sbox[256] =
73 {
74         0x63,0x7C,0x77,0x7B,0xF2,0x6B,0x6F,0xC5,
75         0x30,0x01,0x67,0x2B,0xFE,0xD7,0xAB,0x76,
76         0xCA,0x82,0xC9,0x7D,0xFA,0x59,0x47,0xF0,
77         0xAD,0xD4,0xA2,0xAF,0x9C,0xA4,0x72,0xC0,
78         0xB7,0xFD,0x93,0x26,0x36,0x3F,0xF7,0xCC,
79         0x34,0xA5,0xE5,0xF1,0x71,0xD8,0x31,0x15,
80         0x04,0xC7,0x23,0xC3,0x18,0x96,0x05,0x9A,
81         0x07,0x12,0x80,0xE2,0xEB,0x27,0xB2,0x75,
82         0x09,0x83,0x2C,0x1A,0x1B,0x6E,0x5A,0xA0,
83         0x52,0x3B,0xD6,0xB3,0x29,0xE3,0x2F,0x84,
84         0x53,0xD1,0x00,0xED,0x20,0xFC,0xB1,0x5B,
85         0x6A,0xCB,0xBE,0x39,0x4A,0x4C,0x58,0xCF,
86         0xD0,0xEF,0xAA,0xFB,0x43,0x4D,0x33,0x85,
87         0x45,0xF9,0x02,0x7F,0x50,0x3C,0x9F,0xA8,
88         0x51,0xA3,0x40,0x8F,0x92,0x9D,0x38,0xF5,
89         0xBC,0xB6,0xDA,0x21,0x10,0xFF,0xF3,0xD2,
90         0xCD,0x0C,0x13,0xEC,0x5F,0x97,0x44,0x17,
91         0xC4,0xA7,0x7E,0x3D,0x64,0x5D,0x19,0x73,
92         0x60,0x81,0x4F,0xDC,0x22,0x2A,0x90,0x88,
93         0x46,0xEE,0xB8,0x14,0xDE,0x5E,0x0B,0xDB,
94         0xE0,0x32,0x3A,0x0A,0x49,0x06,0x24,0x5C,
95         0xC2,0xD3,0xAC,0x62,0x91,0x95,0xE4,0x79,
96         0xE7,0xC8,0x37,0x6D,0x8D,0xD5,0x4E,0xA9,
97         0x6C,0x56,0xF4,0xEA,0x65,0x7A,0xAE,0x08,
98         0xBA,0x78,0x25,0x2E,0x1C,0xA6,0xB4,0xC6,
99         0xE8,0xDD,0x74,0x1F,0x4B,0xBD,0x8B,0x8A,
100         0x70,0x3E,0xB5,0x66,0x48,0x03,0xF6,0x0E,
101         0x61,0x35,0x57,0xB9,0x86,0xC1,0x1D,0x9E,
102         0xE1,0xF8,0x98,0x11,0x69,0xD9,0x8E,0x94,
103         0x9B,0x1E,0x87,0xE9,0xCE,0x55,0x28,0xDF,
104         0x8C,0xA1,0x89,0x0D,0xBF,0xE6,0x42,0x68,
105         0x41,0x99,0x2D,0x0F,0xB0,0x54,0xBB,0x16,
106 };
107
108 /*
109  * AES is-box
110  */
111 static const uint8_t aes_isbox[256] = 
112 {
113     0x52,0x09,0x6a,0xd5,0x30,0x36,0xa5,0x38,
114     0xbf,0x40,0xa3,0x9e,0x81,0xf3,0xd7,0xfb,
115     0x7c,0xe3,0x39,0x82,0x9b,0x2f,0xff,0x87,
116     0x34,0x8e,0x43,0x44,0xc4,0xde,0xe9,0xcb,
117     0x54,0x7b,0x94,0x32,0xa6,0xc2,0x23,0x3d,
118     0xee,0x4c,0x95,0x0b,0x42,0xfa,0xc3,0x4e,
119     0x08,0x2e,0xa1,0x66,0x28,0xd9,0x24,0xb2,
120     0x76,0x5b,0xa2,0x49,0x6d,0x8b,0xd1,0x25,
121     0x72,0xf8,0xf6,0x64,0x86,0x68,0x98,0x16,
122     0xd4,0xa4,0x5c,0xcc,0x5d,0x65,0xb6,0x92,
123     0x6c,0x70,0x48,0x50,0xfd,0xed,0xb9,0xda,
124     0x5e,0x15,0x46,0x57,0xa7,0x8d,0x9d,0x84,
125     0x90,0xd8,0xab,0x00,0x8c,0xbc,0xd3,0x0a,
126     0xf7,0xe4,0x58,0x05,0xb8,0xb3,0x45,0x06,
127     0xd0,0x2c,0x1e,0x8f,0xca,0x3f,0x0f,0x02,
128     0xc1,0xaf,0xbd,0x03,0x01,0x13,0x8a,0x6b,
129     0x3a,0x91,0x11,0x41,0x4f,0x67,0xdc,0xea,
130     0x97,0xf2,0xcf,0xce,0xf0,0xb4,0xe6,0x73,
131     0x96,0xac,0x74,0x22,0xe7,0xad,0x35,0x85,
132     0xe2,0xf9,0x37,0xe8,0x1c,0x75,0xdf,0x6e,
133     0x47,0xf1,0x1a,0x71,0x1d,0x29,0xc5,0x89,
134     0x6f,0xb7,0x62,0x0e,0xaa,0x18,0xbe,0x1b,
135     0xfc,0x56,0x3e,0x4b,0xc6,0xd2,0x79,0x20,
136     0x9a,0xdb,0xc0,0xfe,0x78,0xcd,0x5a,0xf4,
137     0x1f,0xdd,0xa8,0x33,0x88,0x07,0xc7,0x31,
138     0xb1,0x12,0x10,0x59,0x27,0x80,0xec,0x5f,
139     0x60,0x51,0x7f,0xa9,0x19,0xb5,0x4a,0x0d,
140     0x2d,0xe5,0x7a,0x9f,0x93,0xc9,0x9c,0xef,
141     0xa0,0xe0,0x3b,0x4d,0xae,0x2a,0xf5,0xb0,
142     0xc8,0xeb,0xbb,0x3c,0x83,0x53,0x99,0x61,
143     0x17,0x2b,0x04,0x7e,0xba,0x77,0xd6,0x26,
144     0xe1,0x69,0x14,0x63,0x55,0x21,0x0c,0x7d
145 };
146
147 static const unsigned char Rcon[30]=
148 {
149         0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80,
150         0x1b,0x36,0x6c,0xd8,0xab,0x4d,0x9a,0x2f,
151         0x5e,0xbc,0x63,0xc6,0x97,0x35,0x6a,0xd4,
152         0xb3,0x7d,0xfa,0xef,0xc5,0x91,
153 };
154
155 /* Perform doubling in Galois Field GF(2^8) using the irreducible polynomial
156    x^8+x^4+x^3+x+1 */
157 static unsigned char AES_xtime(uint32_t x)
158 {
159         return x = (x&0x80) ? (x<<1)^0x1b : x<<1;
160 }
161
162 /**
163  * Set up AES with the key/iv and cipher size.
164  */
165 void AES_set_key(AES_CTX *ctx, const uint8_t *key, 
166         const uint8_t *iv, AES_MODE mode)
167 {
168     int i, ii;
169     uint32_t *W, tmp, tmp2;
170     const unsigned char *ip;
171     int words;
172
173     switch (mode)
174     {
175         case AES_MODE_128:
176             i = 10;
177             words = 4;
178             break;
179
180         case AES_MODE_256:
181             i = 14;
182             words = 8;
183             break;
184
185         default:        /* fail silently */
186             return;
187     }
188
189     ctx->rounds = i;
190     ctx->key_size = words;
191     W = ctx->ks;
192     for (i = 0; i < words; i+=2)
193     {
194         W[i+0]= ((uint32_t)key[ 0]<<24)|
195             ((uint32_t)key[ 1]<<16)|
196             ((uint32_t)key[ 2]<< 8)|
197             ((uint32_t)key[ 3]    );
198         W[i+1]= ((uint32_t)key[ 4]<<24)|
199             ((uint32_t)key[ 5]<<16)|
200             ((uint32_t)key[ 6]<< 8)|
201             ((uint32_t)key[ 7]    );
202         key += 8;
203     }
204
205     ip = Rcon;
206     ii = 4 * (ctx->rounds+1);
207     for (i = words; i<ii; i++)
208     {
209         tmp = W[i-1];
210
211         if ((i % words) == 0)
212         {
213             tmp2 =(uint32_t)aes_sbox[(tmp    )&0xff]<< 8;
214             tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<<16;
215             tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<24;
216             tmp2|=(uint32_t)aes_sbox[(tmp>>24)     ];
217             tmp=tmp2^(((unsigned int)*ip)<<24);
218             ip++;
219         }
220
221         if ((words == 8) && ((i % words) == 4))
222         {
223             tmp2 =(uint32_t)aes_sbox[(tmp    )&0xff]    ;
224             tmp2|=(uint32_t)aes_sbox[(tmp>> 8)&0xff]<< 8;
225             tmp2|=(uint32_t)aes_sbox[(tmp>>16)&0xff]<<16;
226             tmp2|=(uint32_t)aes_sbox[(tmp>>24)     ]<<24;
227             tmp=tmp2;
228         }
229
230         W[i]=W[i-words]^tmp;
231     }
232
233     /* copy the iv across */
234     memcpy(ctx->iv, iv, 16);
235 }
236
237 /**
238  * Change a key for decryption.
239  */
240 void AES_convert_key(AES_CTX *ctx)
241 {
242     int i;
243     uint32_t *k,w,t1,t2,t3,t4;
244
245     k = ctx->ks;
246     k += 4;
247
248     for (i=ctx->rounds*4; i>4; i--)
249     {
250         w= *k;
251         w = inv_mix_col(w,t1,t2,t3,t4);
252         *k++ =w;
253     }
254 }
255
256 #if 0
257 /**
258  * Encrypt a byte sequence (with a block size 16) using the AES cipher.
259  */
260 void AES_cbc_encrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
261 {
262     uint32_t tin0, tin1, tin2, tin3;
263     uint32_t tout0, tout1, tout2, tout3;
264     uint32_t tin[4];
265     uint32_t *iv = (uint32_t *)ctx->iv;
266     uint32_t *msg_32 = (uint32_t *)msg;
267     uint32_t *out_32 = (uint32_t *)out;
268
269     n2l(iv, tout0);
270     n2l(iv, tout1);
271     n2l(iv, tout2);
272     n2l(iv, tout3);
273     iv -= 4;
274
275     for (length -= 16; length >= 0; length -= 16)
276     {
277         n2l(msg_32, tin0);
278         n2l(msg_32, tin1);
279         n2l(msg_32, tin2);
280         n2l(msg_32, tin3);
281         tin[0] = tin0^tout0;
282         tin[1] = tin1^tout1;
283         tin[2] = tin2^tout2;
284         tin[3] = tin3^tout3;
285
286         AES_encrypt(ctx, tin);
287
288         tout0 = tin[0]; 
289         l2n(tout0, out_32);
290         tout1 = tin[1]; 
291         l2n(tout1, out_32);
292         tout2 = tin[2]; 
293         l2n(tout2, out_32);
294         tout3 = tin[3]; 
295         l2n(tout3, out_32);
296     }
297
298     l2n(tout0, iv);
299     l2n(tout1, iv);
300     l2n(tout2, iv);
301     l2n(tout3, iv);
302 }
303
304 /**
305  * Decrypt a byte sequence (with a block size 16) using the AES cipher.
306  */
307 void AES_cbc_decrypt(AES_CTX *ctx, const uint8_t *msg, uint8_t *out, int length)
308 {
309     uint32_t tin0, tin1, tin2, tin3;
310     uint32_t xor0,xor1,xor2,xor3;
311     uint32_t tout0,tout1,tout2,tout3;
312     uint32_t data[4];
313     uint32_t *iv = (uint32_t *)ctx->iv;
314     uint32_t *msg_32 = (uint32_t *)msg;
315     uint32_t *out_32 = (uint32_t *)out;
316
317     n2l(iv ,xor0);
318     n2l(iv, xor1);
319     n2l(iv, xor2);
320     n2l(iv, xor3);
321     iv -= 4;
322
323     for (length-=16; length >= 0; length -= 16)
324     {
325         n2l(msg_32, tin0);
326         n2l(msg_32, tin1);
327         n2l(msg_32, tin2);
328         n2l(msg_32, tin3);
329
330         data[0] = tin0;
331         data[1] = tin1;
332         data[2] = tin2;
333         data[3] = tin3;
334
335         AES_decrypt(ctx, data);
336
337         tout0 = data[0]^xor0;
338         tout1 = data[1]^xor1;
339         tout2 = data[2]^xor2;
340         tout3 = data[3]^xor3;
341
342         xor0 = tin0;
343         xor1 = tin1;
344         xor2 = tin2;
345         xor3 = tin3;
346
347         l2n(tout0, out_32);
348         l2n(tout1, out_32);
349         l2n(tout2, out_32);
350         l2n(tout3, out_32);
351     }
352
353     l2n(xor0, iv);
354     l2n(xor1, iv);
355     l2n(xor2, iv);
356     l2n(xor3, iv);
357 }
358 #endif
359
360 /**
361  * Encrypt a single block (16 bytes) of data
362  */
363 void AES_encrypt(const AES_CTX *ctx, uint32_t *data)
364 {
365     /* To make this code smaller, generate the sbox entries on the fly.
366      * This will have a really heavy effect upon performance.
367      */
368     uint32_t tmp[4];
369     uint32_t tmp1, old_a0, a0, a1, a2, a3, row;
370     int curr_rnd;
371     int rounds = ctx->rounds; 
372     const uint32_t *k = ctx->ks;
373
374     /* Pre-round key addition */
375     for (row = 0; row < 4; row++)
376     {
377         data[row] ^= *(k++);
378     }
379
380     /* Encrypt one block. */
381     for (curr_rnd = 0; curr_rnd < rounds; curr_rnd++)
382     {
383         /* Perform ByteSub and ShiftRow operations together */
384         for (row = 0; row < 4; row++)
385         {
386             a0 = (uint32_t)aes_sbox[(data[row%4]>>24)&0xFF];
387             a1 = (uint32_t)aes_sbox[(data[(row+1)%4]>>16)&0xFF];
388             a2 = (uint32_t)aes_sbox[(data[(row+2)%4]>>8)&0xFF]; 
389             a3 = (uint32_t)aes_sbox[(data[(row+3)%4])&0xFF];
390
391             /* Perform MixColumn iff not last round */
392             if (curr_rnd < (rounds - 1))
393             {
394                 tmp1 = a0 ^ a1 ^ a2 ^ a3;
395                 old_a0 = a0;
396
397                 a0 ^= tmp1 ^ AES_xtime(a0 ^ a1);
398                 a1 ^= tmp1 ^ AES_xtime(a1 ^ a2);
399                 a2 ^= tmp1 ^ AES_xtime(a2 ^ a3);
400                 a3 ^= tmp1 ^ AES_xtime(a3 ^ old_a0);
401
402             }
403
404             tmp[row] = ((a0 << 24) | (a1 << 16) | (a2 << 8) | a3);
405         }
406
407         /* KeyAddition - note that it is vital that this loop is separate from
408            the MixColumn operation, which must be atomic...*/ 
409         for (row = 0; row < 4; row++)
410         {
411             data[row] = tmp[row] ^ *(k++);
412         }
413     }
414 }
415
416 /**
417  * Decrypt a single block (16 bytes) of data
418  */
419 void AES_decrypt(const AES_CTX *ctx, uint32_t *data)
420
421     uint32_t tmp[4];
422     uint32_t xt0,xt1,xt2,xt3,xt4,xt5,xt6;
423     uint32_t a0, a1, a2, a3, row;
424     int curr_rnd;
425     int rounds = ctx->rounds;
426     uint32_t *k = (uint32_t*)ctx->ks + ((rounds+1)*4);
427
428     /* pre-round key addition */
429     for (row=4; row > 0;row--)
430     {
431         data[row-1] ^= *(--k);
432     }
433
434     /* Decrypt one block */
435     for (curr_rnd=0; curr_rnd < rounds; curr_rnd++)
436     {
437         /* Perform ByteSub and ShiftRow operations together */
438         for (row = 4; row > 0; row--)
439         {
440             a0 = aes_isbox[(data[(row+3)%4]>>24)&0xFF];
441             a1 = aes_isbox[(data[(row+2)%4]>>16)&0xFF];
442             a2 = aes_isbox[(data[(row+1)%4]>>8)&0xFF];
443             a3 = aes_isbox[(data[row%4])&0xFF];
444
445             /* Perform MixColumn iff not last round */
446             if (curr_rnd<(rounds-1))
447             {
448                 /* The MDS cofefficients (0x09, 0x0B, 0x0D, 0x0E)
449                    are quite large compared to encryption; this 
450                    operation slows decryption down noticeably. */
451                 xt0 = AES_xtime(a0^a1);
452                 xt1 = AES_xtime(a1^a2);
453                 xt2 = AES_xtime(a2^a3);
454                 xt3 = AES_xtime(a3^a0);
455                 xt4 = AES_xtime(xt0^xt1);
456                 xt5 = AES_xtime(xt1^xt2);
457                 xt6 = AES_xtime(xt4^xt5);
458
459                 xt0 ^= a1^a2^a3^xt4^xt6;
460                 xt1 ^= a0^a2^a3^xt5^xt6;
461                 xt2 ^= a0^a1^a3^xt4^xt6;
462                 xt3 ^= a0^a1^a2^xt5^xt6;
463                 tmp[row-1] = ((xt0<<24)|(xt1<<16)|(xt2<<8)|xt3);
464             }
465             else
466                 tmp[row-1] = ((a0<<24)|(a1<<16)|(a2<<8)|a3);
467         }
468
469         for (row = 4; row > 0; row--)
470         {
471             data[row-1] = tmp[row-1] ^ *(--k);
472         }
473     }
474 }
475
476 #endif