2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 * The x86 CPU (386 upwards) has a divl instruction which will perform
24 * unsigned division of a 64-bit dividend by a 32-bit divisor. If the
25 * resulting quotient does not fit in 32 bits, then a CPU exception
28 * Unsigned integer division is expressed as solving
30 * x = d.q + r 0 <= q, 0 <= r < d
32 * given the dividend (x) and divisor (d), to find the quotient (q)
35 * The x86 divl instruction will solve
37 * x = d.q + r 0 <= q, 0 <= r < d
39 * given x in the range 0 <= x < 2^64 and 1 <= d < 2^32, and causing a
40 * hardware exception if the resulting q >= 2^32.
42 * We can therefore use divl only if we can prove that the conditions
51 * Case 1 : 1 <= d < 2^32
52 * ======================
56 * x = xh.2^32 + xl 0 <= xh < 2^32, 0 <= xl < 2^32 (1)
58 * i.e. split x into low and high dwords. We then solve
60 * xh = d.qh + r' 0 <= qh, 0 <= r' < d (2)
62 * which we can do using a divl instruction since
64 * 0 <= xh < 2^64 since 0 <= xh < 2^32 from (1) (3)
68 * 1 <= d < 2^32 by definition of this Case (4)
72 * d.qh = xh - r' from (2)
73 * d.qh <= xh since r' >= 0 from (2)
74 * qh <= xh since d >= 1 from (2)
75 * qh < 2^32 since xh < 2^32 from (1) (5)
77 * Having obtained qh and r', we then solve
79 * ( r'.2^32 + xl ) = d.ql + r 0 <= ql, 0 <= r < d (6)
81 * which we can do using another divl instruction since
83 * xl <= 2^32 - 1 from (1), so
84 * r'.2^32 + xl <= ( r' + 1 ).2^32 - 1
85 * r'.2^32 + xl <= d.2^32 - 1 since r' < d from (2)
86 * r'.2^32 + xl < d.2^32 (7)
87 * r'.2^32 + xl < 2^64 since d < 2^32 from (4) (8)
91 * 1 <= d < 2^32 by definition of this Case (9)
95 * d.ql = ( r'.2^32 + xl ) - r from (6)
96 * d.ql <= r'.2^32 + xl since r >= 0 from (6)
97 * d.ql < d.2^32 from (7)
98 * ql < 2^32 since d >= 1 from (2) (10)
102 * x = xh.2^32 + xl from (1)
103 * x = ( d.qh + r' ).2^32 + xl from (2)
104 * x = d.qh.2^32 + ( r'.2^32 + xl )
105 * x = d.qh.2^32 + d.ql + r from (3)
106 * x = d.( qh.2^32 + ql ) + r (11)
110 * q = qh.2^32 + ql (12)
114 * x = d.q + r from (11) and (12)
116 * which is the solution.
119 * This therefore gives us a two-step algorithm:
121 * xh = d.qh + r' 0 <= qh, 0 <= r' < d (2)
122 * ( r'.2^32 + xl ) = d.ql + r 0 <= ql, 0 <= r < d (6)
124 * which translates to
140 * (which is a fast dword comparison) then the first divl instruction
141 * can be omitted, since the answer will be
147 * Case 2 : 2^32 <= d < 2^64
148 * =========================
150 * We first express d as
152 * d = dh.2^k + dl 2^31 <= dh < 2^32,
153 * 0 <= dl < 2^k, 1 <= k <= 32 (1)
155 * i.e. find the highest bit set in d, subtract 32, and split d into
156 * dh and dl at that point.
158 * We then express x as
160 * x = xh.2^k + xl 0 <= xl < 2^k (2)
164 * xh.2^k = x - xl from (2)
165 * xh.2^k <= x since xl >= 0 from (1)
166 * xh.2^k < 2^64 since xh < 2^64 from (1)
169 * We then solve the division
171 * xh = dh.q' + r' 0 <= r' < dh (4)
173 * which we can do using a divl instruction since
175 * 0 <= xh < 2^64 since x < 2^64 and xh < x
179 * 1 <= dh < 2^32 from (1)
183 * dh.q' = xh - r' from (4)
184 * dh.q' <= xh since r' >= 0 from (4)
185 * dh.q' < 2^(64-k) from (3) (5)
186 * q'.2^31 <= dh.q' since dh >= 2^31 from (1) (6)
187 * q'.2^31 < 2^(64-k) from (5) and (6)
189 * q' < 2^32 since k >= 1 from (1) (7)
193 * xh.2^k = dh.q'.2^k + r'.2^k from (4)
194 * x - xl = ( d - dl ).q' + r'.2^k from (1) and (2)
195 * x = d.q' + ( r'.2^k + xl ) - dl.q' (8)
199 * r'.2^k + xl < r'.2^k + 2^k since xl < 2^k from (2)
200 * r'.2^k + xl < ( r' + 1 ).2^k
201 * r'.2^k + xl < dh.2^k since r' < dh from (4)
202 * r'.2^k + xl < ( d - dl ) from (1) (9)
208 * This gives us two cases to consider:
212 * dl.q' <= ( r'.2^k + xl ) (15a)
216 * x = d.q' + ( r'.2^k + xl - dl.q' )
218 * is a direct solution to the division, since
220 * r'.2^k + xl < d from (9)
221 * ( r'.2^k + xl - dl.q' ) < d since dl >= 0 and q' >= 0
225 * 0 <= ( r'.2^k + xl - dl.q' ) from (15a)
229 * dl.q' > ( r'.2^k + xl ) (15b)
233 * x = d.(q'-1) + ( r'.2^k + xl ) + ( d - dl.q' )
239 * special case: k = 32 cannot be handled with shifts
248 typedef uint64_t UDItype;
255 static inline void udivmod64_lo ( const struct uint64 *x,
256 const struct uint64 *d,
265 if ( x->h >= d->l ) {
267 : "=&a" ( q->h ), "=&d" ( r_dash )
268 : "g" ( d->l ), "0" ( x->h ), "1" ( 0 ) );
272 : "=&a" ( q->l ), "=&d" ( r->l )
273 : "g" ( d->l ), "0" ( x->l ), "1" ( r_dash ) );
276 static void udivmod64 ( const struct uint64 *x,
277 const struct uint64 *d,
282 udivmod64_lo ( x, d, q, r );
290 * 64-bit division with remainder
297 UDItype __udivmoddi4 ( UDItype x, UDItype d, UDItype *r ) {
304 udivmod64 ( ( struct uint64 * ) _x, ( struct uint64 * ) _d,
305 ( struct uint64 * ) _q, ( struct uint64 * ) _r );
307 assert ( ( x == ( ( d * q ) + (*r) ) ) );
320 UDItype __udivdi3 ( UDItype x, UDItype d ) {
322 return __udivmoddi4 ( x, d, &r );
332 UDItype __umoddi3 ( UDItype x, UDItype d ) {
334 __udivmoddi4 ( x, d, &r );