direct-io.hg

changeset 13371:a8f62eb194e3

[XEN] Clean up long division code, fix for C99-mandated
truncation-towards-zero.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@localhost.localdomain
date Sat Jan 13 20:55:39 2007 +0000 (2007-01-13)
parents ba239a4a7c3f
children e079f1ff6744
files xen/common/lib.c
line diff
     1.1 --- a/xen/common/lib.c	Fri Jan 12 17:39:26 2007 +0000
     1.2 +++ b/xen/common/lib.c	Sat Jan 13 20:55:39 2007 +0000
     1.3 @@ -1,41 +1,44 @@
     1.4  
     1.5  #include <xen/ctype.h>
     1.6  #include <xen/lib.h>
     1.7 -
     1.8 +#include <xen/types.h>
     1.9  
    1.10 -/* for inc/ctype.h */
    1.11 +/* for ctype.h */
    1.12  unsigned char _ctype[] = {
    1.13 -_C,_C,_C,_C,_C,_C,_C,_C,                        /* 0-7 */
    1.14 -_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,         /* 8-15 */
    1.15 -_C,_C,_C,_C,_C,_C,_C,_C,                        /* 16-23 */
    1.16 -_C,_C,_C,_C,_C,_C,_C,_C,                        /* 24-31 */
    1.17 -_S|_SP,_P,_P,_P,_P,_P,_P,_P,                    /* 32-39 */
    1.18 -_P,_P,_P,_P,_P,_P,_P,_P,                        /* 40-47 */
    1.19 -_D,_D,_D,_D,_D,_D,_D,_D,                        /* 48-55 */
    1.20 -_D,_D,_P,_P,_P,_P,_P,_P,                        /* 56-63 */
    1.21 -_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,      /* 64-71 */
    1.22 -_U,_U,_U,_U,_U,_U,_U,_U,                        /* 72-79 */
    1.23 -_U,_U,_U,_U,_U,_U,_U,_U,                        /* 80-87 */
    1.24 -_U,_U,_U,_P,_P,_P,_P,_P,                        /* 88-95 */
    1.25 -_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,      /* 96-103 */
    1.26 -_L,_L,_L,_L,_L,_L,_L,_L,                        /* 104-111 */
    1.27 -_L,_L,_L,_L,_L,_L,_L,_L,                        /* 112-119 */
    1.28 -_L,_L,_L,_P,_P,_P,_P,_C,                        /* 120-127 */
    1.29 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,                /* 128-143 */
    1.30 -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,                /* 144-159 */
    1.31 -_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,   /* 160-175 */
    1.32 -_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,       /* 176-191 */
    1.33 -_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,       /* 192-207 */
    1.34 -_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,       /* 208-223 */
    1.35 -_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,       /* 224-239 */
    1.36 -_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};      /* 240-255 */
    1.37 +    _C,_C,_C,_C,_C,_C,_C,_C,                        /* 0-7 */
    1.38 +    _C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C,         /* 8-15 */
    1.39 +    _C,_C,_C,_C,_C,_C,_C,_C,                        /* 16-23 */
    1.40 +    _C,_C,_C,_C,_C,_C,_C,_C,                        /* 24-31 */
    1.41 +    _S|_SP,_P,_P,_P,_P,_P,_P,_P,                    /* 32-39 */
    1.42 +    _P,_P,_P,_P,_P,_P,_P,_P,                        /* 40-47 */
    1.43 +    _D,_D,_D,_D,_D,_D,_D,_D,                        /* 48-55 */
    1.44 +    _D,_D,_P,_P,_P,_P,_P,_P,                        /* 56-63 */
    1.45 +    _P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U,      /* 64-71 */
    1.46 +    _U,_U,_U,_U,_U,_U,_U,_U,                        /* 72-79 */
    1.47 +    _U,_U,_U,_U,_U,_U,_U,_U,                        /* 80-87 */
    1.48 +    _U,_U,_U,_P,_P,_P,_P,_P,                        /* 88-95 */
    1.49 +    _P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L,      /* 96-103 */
    1.50 +    _L,_L,_L,_L,_L,_L,_L,_L,                        /* 104-111 */
    1.51 +    _L,_L,_L,_L,_L,_L,_L,_L,                        /* 112-119 */
    1.52 +    _L,_L,_L,_P,_P,_P,_P,_C,                        /* 120-127 */
    1.53 +    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,                /* 128-143 */
    1.54 +    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,                /* 144-159 */
    1.55 +    _S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,   /* 160-175 */
    1.56 +    _P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,       /* 176-191 */
    1.57 +    _U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,       /* 192-207 */
    1.58 +    _U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L,       /* 208-223 */
    1.59 +    _L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,       /* 224-239 */
    1.60 +    _L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L};      /* 240-255 */
    1.61  
    1.62 -
    1.63 -/* a couple of 64 bit operations ported from freebsd */
    1.64 -
    1.65 -/*-
    1.66 +/*
    1.67 + * A couple of 64 bit operations ported from FreeBSD.
    1.68 + * The code within the '#if BITS_PER_LONG == 32' block below, and no other
    1.69 + * code in this file, is distributed under the following licensing terms
    1.70 + * This is the modified '3-clause' BSD license with the obnoxious
    1.71 + * advertising clause removed, as permitted by University of California.
    1.72 + *
    1.73   * Copyright (c) 1992, 1993
    1.74 - *	The Regents of the University of California.  All rights reserved.
    1.75 + * The Regents of the University of California.  All rights reserved.
    1.76   *
    1.77   * This software was developed by the Computer Systems Engineering group
    1.78   * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
    1.79 @@ -49,11 +52,7 @@ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,        
    1.80   * 2. Redistributions in binary form must reproduce the above copyright
    1.81   *    notice, this list of conditions and the following disclaimer in the
    1.82   *    documentation and/or other materials provided with the distribution.
    1.83 - * 3. All advertising materials mentioning features or use of this software
    1.84 - *    must display the following acknowledgement:
    1.85 - *	This product includes software developed by the University of
    1.86 - *	California, Berkeley and its contributors.
    1.87 - * 4. Neither the name of the University nor the names of its contributors
    1.88 + * 3. Neither the name of the University nor the names of its contributors
    1.89   *    may be used to endorse or promote products derived from this software
    1.90   *    without specific prior written permission.
    1.91   *
    1.92 @@ -68,12 +67,7 @@ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,        
    1.93   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
    1.94   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
    1.95   * SUCH DAMAGE.
    1.96 - *
    1.97 - * $FreeBSD: src/sys/libkern/divdi3.c,v 1.6 1999/08/28 00:46:31 peter Exp $
    1.98   */
    1.99 -
   1.100 -#include <asm/types.h>
   1.101 -
   1.102  #if BITS_PER_LONG == 32
   1.103  
   1.104  /*
   1.105 @@ -81,10 +75,10 @@ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,        
   1.106   * one or more of the following formats.
   1.107   */
   1.108  union uu {
   1.109 -        s64            q;              /* as a (signed) quad */
   1.110 -        s64            uq;             /* as an unsigned quad */
   1.111 -        long           sl[2];          /* as two signed longs */
   1.112 -        unsigned long  ul[2];          /* as two unsigned longs */
   1.113 +    s64            q;              /* as a (signed) quad */
   1.114 +    s64            uq;             /* as an unsigned quad */
   1.115 +    long           sl[2];          /* as two signed longs */
   1.116 +    unsigned long  ul[2];          /* as two unsigned longs */
   1.117  };
   1.118  /* XXX RN: Yuck hardcoded endianess :) */
   1.119  #define _QUAD_HIGHWORD 1
   1.120 @@ -122,31 +116,26 @@ union uu {
   1.121   * Multiprecision divide.  This algorithm is from Knuth vol. 2 (2nd ed),
   1.122   * section 4.3.1, pp. 257--259.
   1.123   */
   1.124 -#define	B	(1 << HALF_BITS)	/* digit base */
   1.125 +#define B (1 << HALF_BITS) /* digit base */
   1.126  
   1.127  /* Combine two `digits' to make a single two-digit number. */
   1.128 -#define	COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b))
   1.129 +#define COMBINE(a, b) (((u_long)(a) << HALF_BITS) | (b))
   1.130  
   1.131 -/* select a type for digits in base B: use unsigned short if they fit */
   1.132 -#if ULONG_MAX == 0xffffffff && USHRT_MAX >= 0xffff
   1.133 -typedef unsigned short digit;
   1.134 -#else
   1.135 +/* select a type for digits in base B */
   1.136  typedef u_long digit;
   1.137 -#endif
   1.138  
   1.139  /*
   1.140   * Shift p[0]..p[len] left `sh' bits, ignoring any bits that
   1.141   * `fall out' the left (there never will be any such anyway).
   1.142   * We may assume len >= 0.  NOTE THAT THIS WRITES len+1 DIGITS.
   1.143   */
   1.144 -static void
   1.145 -shl(register digit *p, register int len, register int sh)
   1.146 +static void shl(register digit *p, register int len, register int sh)
   1.147  {
   1.148 -	register int i;
   1.149 +    register int i;
   1.150  
   1.151 -	for (i = 0; i < len; i++)
   1.152 -		p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
   1.153 -	p[i] = LHALF(p[i] << sh);
   1.154 +    for (i = 0; i < len; i++)
   1.155 +        p[i] = LHALF(p[i] << sh) | (p[i + 1] >> (HALF_BITS - sh));
   1.156 +    p[i] = LHALF(p[i] << sh);
   1.157  }
   1.158  
   1.159  /*
   1.160 @@ -157,234 +146,222 @@ shl(register digit *p, register int len,
   1.161   * divisor are 4 `digits' in this base (they are shorter if they have
   1.162   * leading zeros).
   1.163   */
   1.164 -u64
   1.165 -__qdivrem(u64 uq, u64 vq, u64 *arq)
   1.166 +u64 __qdivrem(u64 uq, u64 vq, u64 *arq)
   1.167  {
   1.168 -	union uu tmp;
   1.169 -	digit *u, *v, *q;
   1.170 -	register digit v1, v2;
   1.171 -	u_long qhat, rhat, t;
   1.172 -	int m, n, d, j, i;
   1.173 -	digit uspace[5], vspace[5], qspace[5];
   1.174 +    union uu tmp;
   1.175 +    digit *u, *v, *q;
   1.176 +    register digit v1, v2;
   1.177 +    u_long qhat, rhat, t;
   1.178 +    int m, n, d, j, i;
   1.179 +    digit uspace[5], vspace[5], qspace[5];
   1.180  
   1.181 -	/*
   1.182 -	 * Take care of special cases: divide by zero, and u < v.
   1.183 -	 */
   1.184 -	if (vq == 0) {
   1.185 -		/* divide by zero. */
   1.186 -		static volatile const unsigned int zero = 0;
   1.187 +    /*
   1.188 +     * Take care of special cases: divide by zero, and u < v.
   1.189 +     */
   1.190 +    if (vq == 0) {
   1.191 +        /* divide by zero. */
   1.192 +        static volatile const unsigned int zero = 0;
   1.193  
   1.194 -		tmp.ul[H] = tmp.ul[L] = 1 / zero;
   1.195 -		if (arq)
   1.196 -			*arq = uq;
   1.197 -		return (tmp.q);
   1.198 -	}
   1.199 -	if (uq < vq) {
   1.200 -		if (arq)
   1.201 -			*arq = uq;
   1.202 -		return (0);
   1.203 -	}
   1.204 -	u = &uspace[0];
   1.205 -	v = &vspace[0];
   1.206 -	q = &qspace[0];
   1.207 +        tmp.ul[H] = tmp.ul[L] = 1 / zero;
   1.208 +        if (arq)
   1.209 +            *arq = uq;
   1.210 +        return (tmp.q);
   1.211 +    }
   1.212 +    if (uq < vq) {
   1.213 +        if (arq)
   1.214 +            *arq = uq;
   1.215 +        return (0);
   1.216 +    }
   1.217 +    u = &uspace[0];
   1.218 +    v = &vspace[0];
   1.219 +    q = &qspace[0];
   1.220  
   1.221 -	/*
   1.222 -	 * Break dividend and divisor into digits in base B, then
   1.223 -	 * count leading zeros to determine m and n.  When done, we
   1.224 -	 * will have:
   1.225 -	 *	u = (u[1]u[2]...u[m+n]) sub B
   1.226 -	 *	v = (v[1]v[2]...v[n]) sub B
   1.227 -	 *	v[1] != 0
   1.228 -	 *	1 < n <= 4 (if n = 1, we use a different division algorithm)
   1.229 -	 *	m >= 0 (otherwise u < v, which we already checked)
   1.230 -	 *	m + n = 4
   1.231 -	 * and thus
   1.232 -	 *	m = 4 - n <= 2
   1.233 -	 */
   1.234 -	tmp.uq = uq;
   1.235 -	u[0] = 0;
   1.236 -	u[1] = HHALF(tmp.ul[H]);
   1.237 -	u[2] = LHALF(tmp.ul[H]);
   1.238 -	u[3] = HHALF(tmp.ul[L]);
   1.239 -	u[4] = LHALF(tmp.ul[L]);
   1.240 -	tmp.uq = vq;
   1.241 -	v[1] = HHALF(tmp.ul[H]);
   1.242 -	v[2] = LHALF(tmp.ul[H]);
   1.243 -	v[3] = HHALF(tmp.ul[L]);
   1.244 -	v[4] = LHALF(tmp.ul[L]);
   1.245 -	for (n = 4; v[1] == 0; v++) {
   1.246 -		if (--n == 1) {
   1.247 -			u_long rbj;	/* r*B+u[j] (not root boy jim) */
   1.248 -			digit q1, q2, q3, q4;
   1.249 +    /*
   1.250 +     * Break dividend and divisor into digits in base B, then
   1.251 +     * count leading zeros to determine m and n.  When done, we
   1.252 +     * will have:
   1.253 +     * u = (u[1]u[2]...u[m+n]) sub B
   1.254 +     * v = (v[1]v[2]...v[n]) sub B
   1.255 +     * v[1] != 0
   1.256 +     * 1 < n <= 4 (if n = 1, we use a different division algorithm)
   1.257 +     * m >= 0 (otherwise u < v, which we already checked)
   1.258 +     * m + n = 4
   1.259 +     * and thus
   1.260 +     * m = 4 - n <= 2
   1.261 +     */
   1.262 +    tmp.uq = uq;
   1.263 +    u[0] = 0;
   1.264 +    u[1] = HHALF(tmp.ul[H]);
   1.265 +    u[2] = LHALF(tmp.ul[H]);
   1.266 +    u[3] = HHALF(tmp.ul[L]);
   1.267 +    u[4] = LHALF(tmp.ul[L]);
   1.268 +    tmp.uq = vq;
   1.269 +    v[1] = HHALF(tmp.ul[H]);
   1.270 +    v[2] = LHALF(tmp.ul[H]);
   1.271 +    v[3] = HHALF(tmp.ul[L]);
   1.272 +    v[4] = LHALF(tmp.ul[L]);
   1.273 +    for (n = 4; v[1] == 0; v++) {
   1.274 +        if (--n == 1) {
   1.275 +            u_long rbj; /* r*B+u[j] (not root boy jim) */
   1.276 +            digit q1, q2, q3, q4;
   1.277  
   1.278 -			/*
   1.279 -			 * Change of plan, per exercise 16.
   1.280 -			 *	r = 0;
   1.281 -			 *	for j = 1..4:
   1.282 -			 *		q[j] = floor((r*B + u[j]) / v),
   1.283 -			 *		r = (r*B + u[j]) % v;
   1.284 -			 * We unroll this completely here.
   1.285 -			 */
   1.286 -			t = v[2];	/* nonzero, by definition */
   1.287 -			q1 = u[1] / t;
   1.288 -			rbj = COMBINE(u[1] % t, u[2]);
   1.289 -			q2 = rbj / t;
   1.290 -			rbj = COMBINE(rbj % t, u[3]);
   1.291 -			q3 = rbj / t;
   1.292 -			rbj = COMBINE(rbj % t, u[4]);
   1.293 -			q4 = rbj / t;
   1.294 -			if (arq)
   1.295 -				*arq = rbj % t;
   1.296 -			tmp.ul[H] = COMBINE(q1, q2);
   1.297 -			tmp.ul[L] = COMBINE(q3, q4);
   1.298 -			return (tmp.q);
   1.299 -		}
   1.300 -	}
   1.301 +            /*
   1.302 +             * Change of plan, per exercise 16.
   1.303 +             * r = 0;
   1.304 +             * for j = 1..4:
   1.305 +             *  q[j] = floor((r*B + u[j]) / v),
   1.306 +             *  r = (r*B + u[j]) % v;
   1.307 +             * We unroll this completely here.
   1.308 +             */
   1.309 +            t = v[2]; /* nonzero, by definition */
   1.310 +            q1 = u[1] / t;
   1.311 +            rbj = COMBINE(u[1] % t, u[2]);
   1.312 +            q2 = rbj / t;
   1.313 +            rbj = COMBINE(rbj % t, u[3]);
   1.314 +            q3 = rbj / t;
   1.315 +            rbj = COMBINE(rbj % t, u[4]);
   1.316 +            q4 = rbj / t;
   1.317 +            if (arq)
   1.318 +                *arq = rbj % t;
   1.319 +            tmp.ul[H] = COMBINE(q1, q2);
   1.320 +            tmp.ul[L] = COMBINE(q3, q4);
   1.321 +            return (tmp.q);
   1.322 +        }
   1.323 +    }
   1.324  
   1.325 -	/*
   1.326 -	 * By adjusting q once we determine m, we can guarantee that
   1.327 -	 * there is a complete four-digit quotient at &qspace[1] when
   1.328 -	 * we finally stop.
   1.329 -	 */
   1.330 -	for (m = 4 - n; u[1] == 0; u++)
   1.331 -		m--;
   1.332 -	for (i = 4 - m; --i >= 0;)
   1.333 -		q[i] = 0;
   1.334 -	q += 4 - m;
   1.335 +    /*
   1.336 +     * By adjusting q once we determine m, we can guarantee that
   1.337 +     * there is a complete four-digit quotient at &qspace[1] when
   1.338 +     * we finally stop.
   1.339 +     */
   1.340 +    for (m = 4 - n; u[1] == 0; u++)
   1.341 +        m--;
   1.342 +    for (i = 4 - m; --i >= 0;)
   1.343 +        q[i] = 0;
   1.344 +    q += 4 - m;
   1.345  
   1.346 -	/*
   1.347 -	 * Here we run Program D, translated from MIX to C and acquiring
   1.348 -	 * a few minor changes.
   1.349 -	 *
   1.350 -	 * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
   1.351 -	 */
   1.352 -	d = 0;
   1.353 -	for (t = v[1]; t < B / 2; t <<= 1)
   1.354 -		d++;
   1.355 -	if (d > 0) {
   1.356 -		shl(&u[0], m + n, d);		/* u <<= d */
   1.357 -		shl(&v[1], n - 1, d);		/* v <<= d */
   1.358 -	}
   1.359 -	/*
   1.360 -	 * D2: j = 0.
   1.361 -	 */
   1.362 -	j = 0;
   1.363 -	v1 = v[1];	/* for D3 -- note that v[1..n] are constant */
   1.364 -	v2 = v[2];	/* for D3 */
   1.365 -	do {
   1.366 -		register digit uj0, uj1, uj2;
   1.367 +    /*
   1.368 +     * Here we run Program D, translated from MIX to C and acquiring
   1.369 +     * a few minor changes.
   1.370 +     *
   1.371 +     * D1: choose multiplier 1 << d to ensure v[1] >= B/2.
   1.372 +     */
   1.373 +    d = 0;
   1.374 +    for (t = v[1]; t < B / 2; t <<= 1)
   1.375 +        d++;
   1.376 +    if (d > 0) {
   1.377 +        shl(&u[0], m + n, d);  /* u <<= d */
   1.378 +        shl(&v[1], n - 1, d);  /* v <<= d */
   1.379 +    }
   1.380 +    /*
   1.381 +     * D2: j = 0.
   1.382 +     */
   1.383 +    j = 0;
   1.384 +    v1 = v[1]; /* for D3 -- note that v[1..n] are constant */
   1.385 +    v2 = v[2]; /* for D3 */
   1.386 +    do {
   1.387 +        register digit uj0, uj1, uj2;
   1.388  
   1.389 -		/*
   1.390 -		 * D3: Calculate qhat (\^q, in TeX notation).
   1.391 -		 * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
   1.392 -		 * let rhat = (u[j]*B + u[j+1]) mod v[1].
   1.393 -		 * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
   1.394 -		 * decrement qhat and increase rhat correspondingly.
   1.395 -		 * Note that if rhat >= B, v[2]*qhat < rhat*B.
   1.396 -		 */
   1.397 -		uj0 = u[j + 0];	/* for D3 only -- note that u[j+...] change */
   1.398 -		uj1 = u[j + 1];	/* for D3 only */
   1.399 -		uj2 = u[j + 2];	/* for D3 only */
   1.400 -		if (uj0 == v1) {
   1.401 -			qhat = B;
   1.402 -			rhat = uj1;
   1.403 -			goto qhat_too_big;
   1.404 -		} else {
   1.405 -			u_long nn = COMBINE(uj0, uj1);
   1.406 -			qhat = nn / v1;
   1.407 -			rhat = nn % v1;
   1.408 -		}
   1.409 -		while (v2 * qhat > COMBINE(rhat, uj2)) {
   1.410 -	qhat_too_big:
   1.411 -			qhat--;
   1.412 -			if ((rhat += v1) >= B)
   1.413 -				break;
   1.414 -		}
   1.415 -		/*
   1.416 -		 * D4: Multiply and subtract.
   1.417 -		 * The variable `t' holds any borrows across the loop.
   1.418 -		 * We split this up so that we do not require v[0] = 0,
   1.419 -		 * and to eliminate a final special case.
   1.420 -		 */
   1.421 -		for (t = 0, i = n; i > 0; i--) {
   1.422 -			t = u[i + j] - v[i] * qhat - t;
   1.423 -			u[i + j] = LHALF(t);
   1.424 -			t = (B - HHALF(t)) & (B - 1);
   1.425 -		}
   1.426 -		t = u[j] - t;
   1.427 -		u[j] = LHALF(t);
   1.428 -		/*
   1.429 -		 * D5: test remainder.
   1.430 -		 * There is a borrow if and only if HHALF(t) is nonzero;
   1.431 -		 * in that (rare) case, qhat was too large (by exactly 1).
   1.432 -		 * Fix it by adding v[1..n] to u[j..j+n].
   1.433 -		 */
   1.434 -		if (HHALF(t)) {
   1.435 -			qhat--;
   1.436 -			for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
   1.437 -				t += u[i + j] + v[i];
   1.438 -				u[i + j] = LHALF(t);
   1.439 -				t = HHALF(t);
   1.440 -			}
   1.441 -			u[j] = LHALF(u[j] + t);
   1.442 -		}
   1.443 -		q[j] = qhat;
   1.444 -	} while (++j <= m);		/* D7: loop on j. */
   1.445 +        /*
   1.446 +         * D3: Calculate qhat (\^q, in TeX notation).
   1.447 +         * Let qhat = min((u[j]*B + u[j+1])/v[1], B-1), and
   1.448 +         * let rhat = (u[j]*B + u[j+1]) mod v[1].
   1.449 +         * While rhat < B and v[2]*qhat > rhat*B+u[j+2],
   1.450 +         * decrement qhat and increase rhat correspondingly.
   1.451 +         * Note that if rhat >= B, v[2]*qhat < rhat*B.
   1.452 +         */
   1.453 +        uj0 = u[j + 0]; /* for D3 only -- note that u[j+...] change */
   1.454 +        uj1 = u[j + 1]; /* for D3 only */
   1.455 +        uj2 = u[j + 2]; /* for D3 only */
   1.456 +        if (uj0 == v1) {
   1.457 +            qhat = B;
   1.458 +            rhat = uj1;
   1.459 +            goto qhat_too_big;
   1.460 +        } else {
   1.461 +            u_long nn = COMBINE(uj0, uj1);
   1.462 +            qhat = nn / v1;
   1.463 +            rhat = nn % v1;
   1.464 +        }
   1.465 +        while (v2 * qhat > COMBINE(rhat, uj2)) {
   1.466 +        qhat_too_big:
   1.467 +            qhat--;
   1.468 +            if ((rhat += v1) >= B)
   1.469 +                break;
   1.470 +        }
   1.471 +        /*
   1.472 +         * D4: Multiply and subtract.
   1.473 +         * The variable `t' holds any borrows across the loop.
   1.474 +         * We split this up so that we do not require v[0] = 0,
   1.475 +         * and to eliminate a final special case.
   1.476 +         */
   1.477 +        for (t = 0, i = n; i > 0; i--) {
   1.478 +            t = u[i + j] - v[i] * qhat - t;
   1.479 +            u[i + j] = LHALF(t);
   1.480 +            t = (B - HHALF(t)) & (B - 1);
   1.481 +        }
   1.482 +        t = u[j] - t;
   1.483 +        u[j] = LHALF(t);
   1.484 +        /*
   1.485 +         * D5: test remainder.
   1.486 +         * There is a borrow if and only if HHALF(t) is nonzero;
   1.487 +         * in that (rare) case, qhat was too large (by exactly 1).
   1.488 +         * Fix it by adding v[1..n] to u[j..j+n].
   1.489 +         */
   1.490 +        if (HHALF(t)) {
   1.491 +            qhat--;
   1.492 +            for (t = 0, i = n; i > 0; i--) { /* D6: add back. */
   1.493 +                t += u[i + j] + v[i];
   1.494 +                u[i + j] = LHALF(t);
   1.495 +                t = HHALF(t);
   1.496 +            }
   1.497 +            u[j] = LHALF(u[j] + t);
   1.498 +        }
   1.499 +        q[j] = qhat;
   1.500 +    } while (++j <= m);  /* D7: loop on j. */
   1.501  
   1.502 -	/*
   1.503 -	 * If caller wants the remainder, we have to calculate it as
   1.504 -	 * u[m..m+n] >> d (this is at most n digits and thus fits in
   1.505 -	 * u[m+1..m+n], but we may need more source digits).
   1.506 -	 */
   1.507 -	if (arq) {
   1.508 -		if (d) {
   1.509 -			for (i = m + n; i > m; --i)
   1.510 -				u[i] = (u[i] >> d) |
   1.511 -				    LHALF(u[i - 1] << (HALF_BITS - d));
   1.512 -			u[i] = 0;
   1.513 -		}
   1.514 -		tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
   1.515 -		tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
   1.516 -		*arq = tmp.q;
   1.517 -	}
   1.518 +    /*
   1.519 +     * If caller wants the remainder, we have to calculate it as
   1.520 +     * u[m..m+n] >> d (this is at most n digits and thus fits in
   1.521 +     * u[m+1..m+n], but we may need more source digits).
   1.522 +     */
   1.523 +    if (arq) {
   1.524 +        if (d) {
   1.525 +            for (i = m + n; i > m; --i)
   1.526 +                u[i] = (u[i] >> d) |
   1.527 +                    LHALF(u[i - 1] << (HALF_BITS - d));
   1.528 +            u[i] = 0;
   1.529 +        }
   1.530 +        tmp.ul[H] = COMBINE(uspace[1], uspace[2]);
   1.531 +        tmp.ul[L] = COMBINE(uspace[3], uspace[4]);
   1.532 +        *arq = tmp.q;
   1.533 +    }
   1.534  
   1.535 -	tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
   1.536 -	tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
   1.537 -	return (tmp.q);
   1.538 +    tmp.ul[H] = COMBINE(qspace[1], qspace[2]);
   1.539 +    tmp.ul[L] = COMBINE(qspace[3], qspace[4]);
   1.540 +    return (tmp.q);
   1.541  }
   1.542  
   1.543  /*
   1.544   * Divide two signed quads.
   1.545 - * ??? if -1/2 should produce -1 on this machine, this code is wrong
   1.546 - * (Grzegorz Milos) Note for the above: -1/2 is 0. And so it should.
   1.547 + * Truncates towards zero, as required by C99.
   1.548   */
   1.549 -s64
   1.550 -__divdi3(s64 a, s64 b)
   1.551 +s64 __divdi3(s64 a, s64 b)
   1.552  {
   1.553 -	u64 ua, ub, uq;
   1.554 -	int neg;
   1.555 -
   1.556 -	if (a < 0)
   1.557 -		ua = -(u64)a, neg = 1;
   1.558 -	else
   1.559 -		ua = a, neg = 0;
   1.560 -	if (b < 0)
   1.561 -		ub = -(u64)b, neg ^= 1;
   1.562 -	else
   1.563 -		ub = b;
   1.564 -	uq = __qdivrem(ua, ub, (u64 *)0);
   1.565 -	return (neg ? -uq : uq);
   1.566 +    u64 ua, ub, uq;
   1.567 +    int neg = (a < 0) ^ (b < 0);
   1.568 +    ua = (a < 0) ? -(u64)a : a;
   1.569 +    ub = (b < 0) ? -(u64)b : b;
   1.570 +    uq = __qdivrem(ua, ub, (u64 *)0);
   1.571 +    return (neg ? -uq : uq);
   1.572  }
   1.573  
   1.574  
   1.575  /*
   1.576   * Divide two unsigned quads.
   1.577   */
   1.578 -u64
   1.579 -__udivdi3(u64 a, u64 b)
   1.580 +u64 __udivdi3(u64 a, u64 b)
   1.581  {
   1.582 -
   1.583 -        return (__qdivrem(a, b, (u64 *)0));
   1.584 +    return __qdivrem(a, b, (u64 *)0);
   1.585  }
   1.586  
   1.587  /*
   1.588 @@ -392,87 +369,66 @@ u64
   1.589   */
   1.590  u64 __umoddi3(u64 a, u64 b)
   1.591  {
   1.592 -	u64 rem;
   1.593 -	__qdivrem(a, b, &rem);
   1.594 -	return rem;
   1.595 +    u64 rem;
   1.596 +    __qdivrem(a, b, &rem);
   1.597 +    return rem;
   1.598  }
   1.599  
   1.600  /*
   1.601   * Remainder of signed quad division.
   1.602 - * The result of mod is not always equal to division
   1.603 - * remainder. The following example shows the result for all
   1.604 - * four possible cases:
   1.605 + * Truncates towards zero, as required by C99:
   1.606   *  11 %  5 =  1
   1.607 - * -11 %  5 =  4
   1.608 - *  11 % -5 = -4
   1.609 - * -11 % -5 = -1
   1.610 + * -11 %  5 = -1
   1.611 + *  11 % -5 =  1
   1.612 + * -11 % -5 =  1
   1.613   */
   1.614  s64 __moddi3(s64 a, s64 b)
   1.615  {
   1.616 -	u64 ua, ub, urem;
   1.617 -	int neg1, neg2;
   1.618 -
   1.619 -	if (a < 0)
   1.620 -		ua = -(u64)a, neg1 = 1;
   1.621 -	else
   1.622 -		ua = a, neg1 = 0;
   1.623 -        
   1.624 -	if (b < 0)
   1.625 -		ub = -(u64)b, neg2 = 1;
   1.626 -	else
   1.627 -		ub = b, neg2 = 0;
   1.628 -	__qdivrem(ua, ub, &urem);
   1.629 -    
   1.630 -	/* There 4 different cases: */
   1.631 -	if (neg1) {
   1.632 -		if (neg2)
   1.633 -			return -urem;
   1.634 -		else
   1.635 -			return ub - urem;
   1.636 -	} else {
   1.637 -		if (neg2)
   1.638 -			return -ub + urem;
   1.639 -		else
   1.640 -			return urem;
   1.641 -	}
   1.642 +    u64 ua, ub, urem;
   1.643 +    int neg = (a < 0);
   1.644 +    ua = neg ? -(u64)a : a;
   1.645 +    ub = (b < 0) ? -(u64)b : b;
   1.646 +    __qdivrem(ua, ub, &urem);
   1.647 +    return (neg ? -urem : urem);
   1.648  }
   1.649  
   1.650  #endif /* BITS_PER_LONG == 32 */
   1.651  
   1.652  unsigned long long parse_size_and_unit(const char *s, const char **ps)
   1.653  {
   1.654 -	unsigned long long ret;
   1.655 -	const char *s1;
   1.656 +    unsigned long long ret;
   1.657 +    const char *s1;
   1.658  
   1.659 -	ret = simple_strtoull(s, &s1, 0);
   1.660 +    ret = simple_strtoull(s, &s1, 0);
   1.661  
   1.662 -	switch (*s1) {
   1.663 -	case 'G': case 'g':
   1.664 -		ret <<= 10;
   1.665 -	case 'M': case 'm':
   1.666 -		ret <<= 10;
   1.667 -	case 'K': case 'k':
   1.668 -		ret <<= 10;
   1.669 -	case 'B': case 'b':
   1.670 -		s1++;
   1.671 -		break;
   1.672 -	default:
   1.673 -		ret <<= 10; /* default to kB */
   1.674 -		break;
   1.675 -	}
   1.676 +    switch ( *s1 )
   1.677 +    {
   1.678 +    case 'G': case 'g':
   1.679 +        ret <<= 10;
   1.680 +    case 'M': case 'm':
   1.681 +        ret <<= 10;
   1.682 +    case 'K': case 'k':
   1.683 +        ret <<= 10;
   1.684 +    case 'B': case 'b':
   1.685 +        s1++;
   1.686 +        break;
   1.687 +    default:
   1.688 +        ret <<= 10; /* default to kB */
   1.689 +        break;
   1.690 +    }
   1.691  
   1.692 -	if (ps != NULL)
   1.693 -		*ps = s1;
   1.694 +    if ( ps != NULL )
   1.695 +        *ps = s1;
   1.696  
   1.697 -	return ret;
   1.698 +    return ret;
   1.699  }
   1.700  
   1.701  /*
   1.702   * Local variables:
   1.703   * mode: C
   1.704   * c-set-style: "BSD"
   1.705 - * c-basic-offset: 8
   1.706 - * tab-width: 8
   1.707 - * indent-tabs-mode: t
   1.708 + * c-basic-offset: 4
   1.709 + * tab-width: 4
   1.710 + * indent-tabs-mode: nil
   1.711   * End:
   1.712   */