Merge branch 'akpm' (Andrew's patch-bomb)

Merge misc patches from Andrew Morton: - the "misc" tree - stuff from all over the map - checkpatch updates - fatfs - kmod changes - procfs - cpumask - UML - kexec - mqueue - rapidio - pidns - some checkpoint-restore feature work. Reluctantly. Most of it delayed a release. I'm still rather worried that we don't have a clear roadmap to completion for this work. * emailed from Andrew Morton <akpm@linux-foundation.org>: (78 patches) kconfig: update compression algorithm info c/r: prctl: add ability to set new mm_struct::exe_file c/r: prctl: extend PR_SET_MM to set up more mm_struct entries c/r: procfs: add arg_start/end, env_start/end and exit_code members to /proc/$pid/stat syscalls, x86: add __NR_kcmp syscall fs, proc: introduce /proc/<pid>/task/<tid>/children entry sysctl: make kernel.ns_last_pid control dependent on CHECKPOINT_RESTORE aio/vfs: cleanup of rw_copy_check_uvector() and compat_rw_copy_check_uvector() eventfd: change int to __u64 in eventfd_signal() fs/nls: add Apple NLS pidns: make killed children autoreap pidns: use task_active_pid_ns in do_notify_parent rapidio/tsi721: add DMA engine support rapidio: add DMA engine support for RIO data transfers ipc/mqueue: add rbtree node caching support tools/selftests: add mq_perf_tests ipc/mqueue: strengthen checks on mqueue creation ipc/mqueue: correct mq_attr_ok test ipc/mqueue: improve performance of send/recv selftests: add mq_open_tests ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2012-05-31 21:10:18 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2012-05-31 21:10:18 -0400
commit: 08615d7d85e5aa02c05bf6c4dde87d940e7f85f6 (patch)
tree: 18906149d313d25914160aca21cedf54b3a7e818 /lib
parent: 9fdadb2cbaf4b482dfd6086e8bd3d2db071a1702 (diff)
parent: 0a4dd35c67b144d8ef9432120105f1aab9293ee9 (diff)
1 files changed, 195 insertions, 94 deletions
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 5391299c1e7..c3f36d415bd 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -112,106 +112,199 @@ int skip_atoi(const char **s)
 /* Decimal conversion is by far the most typical, and is used
 * for /proc and /sys data. This directly impacts e.g. top performance
 * with many processes running. We optimize it for speed
- * using code from
+ * using ideas described at <http://www.cs.uiowa.edu/~jones/bcd/divide.html>
- * http://www.cs.uiowa.edu/~jones/bcd/decimal.html
+ * (with permission from the author, Douglas W. Jones).
- * (with permission from the author, Douglas W. Jones). */
+ */
-/* Formats correctly any integer in [0,99999].
+#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64
- * Outputs from one to five digits depending on input.
+/* Formats correctly any integer in [0, 999999999] */
- * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */
 static noinline_for_stack
-char *put_dec_trunc(char *buf, unsigned q)
+char *put_dec_full9(char *buf, unsigned q)
 {
-        unsigned d3, d2, d1, d0;
+        unsigned r;
-        d1 = (q>>4) & 0xf;
-        d2 = (q>>8) & 0xf;
-        d3 = (q>>12);
-        d0 = 6*(d3 + d2 + d1) + (q & 0xf);
-        q = (d0 * 0xcd) >> 11;
-        d0 = d0 - 10*q;
-        *buf++ = d0 + '0'; /* least significant digit */
-        d1 = q + 9*d3 + 5*d2 + d1;
-        if (d1 != 0) {
-                q = (d1 * 0xcd) >> 11;
-                d1 = d1 - 10*q;
-                *buf++ = d1 + '0'; /* next digit */
-                d2 = q + 2*d2;
-                if ((d2 != 0) || (d3 != 0)) {
-                        q = (d2 * 0xd) >> 7;
-                        d2 = d2 - 10*q;
-                        *buf++ = d2 + '0'; /* next digit */
-                        d3 = q + 4*d3;
-                        if (d3 != 0) {
-                                q = (d3 * 0xcd) >> 11;
-                                d3 = d3 - 10*q;
-                                *buf++ = d3 + '0';  /* next digit */
-                                if (q != 0)
-                                        *buf++ = q + '0'; /* most sign. digit */
-                        }
-                }
-        }
+        /*
+         * Possible ways to approx. divide by 10
+         * (x * 0x1999999a) >> 32 x < 1073741829 (multiply must be 64-bit)
+         * (x * 0xcccd) >> 19     x <      81920 (x < 262149 when 64-bit mul)
+         * (x * 0x6667) >> 18     x <      43699
+         * (x * 0x3334) >> 17     x <      16389
+         * (x * 0x199a) >> 16     x <      16389
+         * (x * 0x0ccd) >> 15     x <      16389
+         * (x * 0x0667) >> 14     x <       2739
+         * (x * 0x0334) >> 13     x <       1029
+         * (x * 0x019a) >> 12     x <       1029
+         * (x * 0x00cd) >> 11     x <       1029 shorter code than * 0x67 (on i386)
+         * (x * 0x0067) >> 10     x <        179
+         * (x * 0x0034) >>  9     x <         69 same
+         * (x * 0x001a) >>  8     x <         69 same
+         * (x * 0x000d) >>  7     x <         69 same, shortest code (on i386)
+         * (x * 0x0007) >>  6     x <         19
+         * See <http://www.cs.uiowa.edu/~jones/bcd/divide.html>
+         */
+        r      = (q * (uint64_t)0x1999999a) >> 32;
+        *buf++ = (q - 10 * r) + '0'; /* 1 */
+        q      = (r * (uint64_t)0x1999999a) >> 32;
+        *buf++ = (r - 10 * q) + '0'; /* 2 */
+        r      = (q * (uint64_t)0x1999999a) >> 32;
+        *buf++ = (q - 10 * r) + '0'; /* 3 */
+        q      = (r * (uint64_t)0x1999999a) >> 32;
+        *buf++ = (r - 10 * q) + '0'; /* 4 */
+        r      = (q * (uint64_t)0x1999999a) >> 32;
+        *buf++ = (q - 10 * r) + '0'; /* 5 */
+        /* Now value is under 10000, can avoid 64-bit multiply */
+        q      = (r * 0x199a) >> 16;
+        *buf++ = (r - 10 * q)  + '0'; /* 6 */
+        r      = (q * 0xcd) >> 11;
+        *buf++ = (q - 10 * r)  + '0'; /* 7 */
+        q      = (r * 0xcd) >> 11;
+        *buf++ = (r - 10 * q) + '0'; /* 8 */
+        *buf++ = q + '0'; /* 9 */
        return buf;
 }
-/* Same with if's removed. Always emits five digits */
+#endif
+/* Similar to above but do not pad with zeros.
+ * Code can be easily arranged to print 9 digits too, but our callers
+ * always call put_dec_full9() instead when the number has 9 decimal digits.
+ */
 static noinline_for_stack
-char *put_dec_full(char *buf, unsigned q)
+char *put_dec_trunc8(char *buf, unsigned r)
 {
-        /* BTW, if q is in [0,9999], 8-bit ints will be enough, */
+        unsigned q;
-        /* but anyway, gcc produces better code with full-sized ints */
-        unsigned d3, d2, d1, d0;
+        /* Copy of previous function's body with added early returns */
-        d1 = (q>>4) & 0xf;
+        q      = (r * (uint64_t)0x1999999a) >> 32;
-        d2 = (q>>8) & 0xf;
+        *buf++ = (r - 10 * q) + '0'; /* 2 */
-        d3 = (q>>12);
+        if (q == 0)
+                return buf;
+        r      = (q * (uint64_t)0x1999999a) >> 32;
+        *buf++ = (q - 10 * r) + '0'; /* 3 */
+        if (r == 0)
+                return buf;
+        q      = (r * (uint64_t)0x1999999a) >> 32;
+        *buf++ = (r - 10 * q) + '0'; /* 4 */
+        if (q == 0)
+                return buf;
+        r      = (q * (uint64_t)0x1999999a) >> 32;
+        *buf++ = (q - 10 * r) + '0'; /* 5 */
+        if (r == 0)
+                return buf;
+        q      = (r * 0x199a) >> 16;
+        *buf++ = (r - 10 * q)  + '0'; /* 6 */
+        if (q == 0)
+                return buf;
+        r      = (q * 0xcd) >> 11;
+        *buf++ = (q - 10 * r)  + '0'; /* 7 */
+        if (r == 0)
+                return buf;
+        q      = (r * 0xcd) >> 11;
+        *buf++ = (r - 10 * q) + '0'; /* 8 */
+        if (q == 0)
+                return buf;
+        *buf++ = q + '0'; /* 9 */
+        return buf;
+}
-        /*
+/* There are two algorithms to print larger numbers.
-         * Possible ways to approx. divide by 10
+ * One is generic: divide by 1000000000 and repeatedly print
-         * gcc -O2 replaces multiply with shifts and adds
+ * groups of (up to) 9 digits. It's conceptually simple,
-         * (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386)
+ * but requires a (unsigned long long) / 1000000000 division.
-         * (x * 0x67) >> 10:  1100111
+ *
-         * (x * 0x34) >> 9:    110100 - same
+ * Second algorithm splits 64-bit unsigned long long into 16-bit chunks,
-         * (x * 0x1a) >> 8:     11010 - same
+ * manipulates them cleverly and generates groups of 4 decimal digits.
-         * (x * 0x0d) >> 7:      1101 - same, shortest code (on i386)
+ * It so happens that it does NOT require long long division.
-         */
+ *
-        d0 = 6*(d3 + d2 + d1) + (q & 0xf);
+ * If long is > 32 bits, division of 64-bit values is relatively easy,
-        q = (d0 * 0xcd) >> 11;
+ * and we will use the first algorithm.
-        d0 = d0 - 10*q;
+ * If long long is > 64 bits (strange architecture with VERY large long long),
-        *buf++ = d0 + '0';
+ * second algorithm can't be used, and we again use the first one.
-        d1 = q + 9*d3 + 5*d2 + d1;
+ *
-                q = (d1 * 0xcd) >> 11;
+ * Else (if long is 32 bits and long long is 64 bits) we use second one.
-                d1 = d1 - 10*q;
+ */
-                *buf++ = d1 + '0';
-                d2 = q + 2*d2;
-                        q = (d2 * 0xd) >> 7;
-                        d2 = d2 - 10*q;
-                        *buf++ = d2 + '0';
-                        d3 = q + 4*d3;
-                                q = (d3 * 0xcd) >> 11; /* - shorter code */
-                                /* q = (d3 * 0x67) >> 10; - would also work */
-                                d3 = d3 - 10*q;
-                                *buf++ = d3 + '0';
-                                        *buf++ = q + '0';
-        return buf;
+#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64
+/* First algorithm: generic */
+static
+char *put_dec(char *buf, unsigned long long n)
+{
+        if (n >= 100*1000*1000) {
+                while (n >= 1000*1000*1000)
+                        buf = put_dec_full9(buf, do_div(n, 1000*1000*1000));
+                if (n >= 100*1000*1000)
+                        return put_dec_full9(buf, n);
+        }
+        return put_dec_trunc8(buf, n);
 }
-/* No inlining helps gcc to use registers better */
+#else
+/* Second algorithm: valid only for 64-bit long longs */
 static noinline_for_stack
-char *put_dec(char *buf, unsigned long long num)
+char *put_dec_full4(char *buf, unsigned q)
 {
-        while (1) {
+        unsigned r;
-                unsigned rem;
+        r      = (q * 0xcccd) >> 19;
-                if (num < 100000)
+        *buf++ = (q - 10 * r) + '0';
-                        return put_dec_trunc(buf, num);
+        q      = (r * 0x199a) >> 16;
-                rem = do_div(num, 100000);
+        *buf++ = (r - 10 * q)  + '0';
-                buf = put_dec_full(buf, rem);
+        r      = (q * 0xcd) >> 11;
-        }
+        *buf++ = (q - 10 * r)  + '0';
+        *buf++ = r + '0';
+        return buf;
 }
+/* Based on code by Douglas W. Jones found at
+ * <http://www.cs.uiowa.edu/~jones/bcd/decimal.html#sixtyfour>
+ * (with permission from the author).
+ * Performs no 64-bit division and hence should be fast on 32-bit machines.
+ */
+static
+char *put_dec(char *buf, unsigned long long n)
+{
+        uint32_t d3, d2, d1, q, h;
+        if (n < 100*1000*1000)
+                return put_dec_trunc8(buf, n);
+        d1  = ((uint32_t)n >> 16); /* implicit "& 0xffff" */
+        h   = (n >> 32);
+        d2  = (h      ) & 0xffff;
+        d3  = (h >> 16); /* implicit "& 0xffff" */
+        q   = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff);
+        buf = put_dec_full4(buf, q % 10000);
+        q   = q / 10000;
+        d1  = q + 7671 * d3 + 9496 * d2 + 6 * d1;
+        buf = put_dec_full4(buf, d1 % 10000);
+        q   = d1 / 10000;
+        d2  = q + 4749 * d3 + 42 * d2;
+        buf = put_dec_full4(buf, d2 % 10000);
+        q   = d2 / 10000;
+        d3  = q + 281 * d3;
+        if (!d3)
+                goto done;
+        buf = put_dec_full4(buf, d3 % 10000);
+        q   = d3 / 10000;
+        if (!q)
+                goto done;
+        buf = put_dec_full4(buf, q);
+ done:
+        while (buf[-1] == '0')
+                --buf;
+        return buf;
+}
+#endif
 /*
 * Convert passed number to decimal string.
 * Returns the length of string.  On buffer overflow, returns 0.
@@ -220,16 +313,22 @@ char *put_dec(char *buf, unsigned long long num)
 */
 int num_to_str(char *buf, int size, unsigned long long num)
 {
-        char tmp[21];           /* Enough for 2^64 in decimal */
+        char tmp[sizeof(num) * 3];
        int idx, len;
-        len = put_dec(tmp, num) - tmp;
+        /* put_dec() may work incorrectly for num = 0 (generate "", not "0") */
+        if (num <= 9) {
+                tmp[0] = '0' + num;
+                len = 1;
+        } else {
+                len = put_dec(tmp, num) - tmp;
+        }
        if (len > size)
                return 0;
        for (idx = 0; idx < len; ++idx)
                buf[idx] = tmp[len - idx - 1];
-        return  len;
+        return len;
 }
 #define ZEROPAD 1               /* pad with zero */
@@ -314,8 +413,8 @@ char *number(char *buf, char *end, unsigned long long num,
        /* generate full string in tmp[], in reverse order */
        i = 0;
-        if (num == 0)
+        if (num < spec.base)
-                tmp[i++] = '0';
+                tmp[i++] = digits[num] | locase;
        /* Generic code, for any base:
        else do {
                tmp[i++] = (digits[do_div(num,base)] | locase);
@@ -611,7 +710,7 @@ char *ip4_string(char *p, const u8 *addr, const char *fmt)
        }
        for (i = 0; i < 4; i++) {
                char temp[3];   /* hold each IP quad in reverse order */
-                int digits = put_dec_trunc(temp, addr[index]) - temp;
+                int digits = put_dec_trunc8(temp, addr[index]) - temp;
                if (leading_zeros) {
                        if (digits < 3)
                                *p++ = '0';
@@ -870,13 +969,15 @@ static noinline_for_stack
 char *pointer(const char *fmt, char *buf, char *end, void *ptr,
              struct printf_spec spec)
 {
+        int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
        if (!ptr && *fmt != 'K') {
                /*
                 * Print (null) with the same width as a pointer so it makes
                 * tabular output look nice.
                 */
                if (spec.field_width == -1)
-                        spec.field_width = 2 * sizeof(void *);
+                        spec.field_width = default_width;
                return string(buf, end, "(null)", spec);
        }
@@ -931,7 +1032,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
                 */
                if (in_irq() || in_serving_softirq() || in_nmi()) {
                        if (spec.field_width == -1)
-                                spec.field_width = 2 * sizeof(void *);
+                                spec.field_width = default_width;
                        return string(buf, end, "pK-error", spec);
                }
                if (!((kptr_restrict == 0) ||
@@ -948,7 +1049,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
        }
        spec.flags |= SMALL;
        if (spec.field_width == -1) {
-                spec.field_width = 2 * sizeof(void *);
+                spec.field_width = default_width;
                spec.flags |= ZEROPAD;
        }
        spec.base = 16;
author	Linus Torvalds <torvalds@linux-foundation.org>	2012-05-31 21:10:18 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2012-05-31 21:10:18 -0400
commit	08615d7d85e5aa02c05bf6c4dde87d940e7f85f6 (patch)
tree	18906149d313d25914160aca21cedf54b3a7e818 /lib
parent	9fdadb2cbaf4b482dfd6086e8bd3d2db071a1702 (diff)
parent	0a4dd35c67b144d8ef9432120105f1aab9293ee9 (diff)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 5391299c1e7..c3f36d415bd 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c
@@ -112,106 +112,199 @@ int skip_atoi(const char **s)
112	/* Decimal conversion is by far the most typical, and is used	112	/* Decimal conversion is by far the most typical, and is used
113	* for /proc and /sys data. This directly impacts e.g. top performance	113	* for /proc and /sys data. This directly impacts e.g. top performance
114	* with many processes running. We optimize it for speed	114	* with many processes running. We optimize it for speed
115	* using code from	115	* using ideas described at <http://www.cs.uiowa.edu/~jones/bcd/divide.html>
116	* http://www.cs.uiowa.edu/~jones/bcd/decimal.html	116	* (with permission from the author, Douglas W. Jones).
117	* (with permission from the author, Douglas W. Jones). */	117	*/
118		118
119	/* Formats correctly any integer in [0,99999].	119	#if BITS_PER_LONG != 32 \|\| BITS_PER_LONG_LONG != 64
120	* Outputs from one to five digits depending on input.	120	/* Formats correctly any integer in [0, 999999999] */
121	* On i386 gcc 4.1.2 -O2: ~250 bytes of code. */
122	static noinline_for_stack	121	static noinline_for_stack
123	char put_dec_trunc(char buf, unsigned q)	122	char put_dec_full9(char buf, unsigned q)
124	{	123	{
125	unsigned d3, d2, d1, d0;	124	unsigned r;
126	d1 = (q>>4) & 0xf;
127	d2 = (q>>8) & 0xf;
128	d3 = (q>>12);
129
130	d0 = 6*(d3 + d2 + d1) + (q & 0xf);
131	q = (d0 * 0xcd) >> 11;
132	d0 = d0 - 10*q;
133	buf++ = d0 + '0'; / least significant digit */
134	d1 = q + 9d3 + 5d2 + d1;
135	if (d1 != 0) {
136	q = (d1 * 0xcd) >> 11;
137	d1 = d1 - 10*q;
138	buf++ = d1 + '0'; / next digit */
139
140	d2 = q + 2*d2;
141	if ((d2 != 0) \|\| (d3 != 0)) {
142	q = (d2 * 0xd) >> 7;
143	d2 = d2 - 10*q;
144	buf++ = d2 + '0'; / next digit */
145
146	d3 = q + 4*d3;
147	if (d3 != 0) {
148	q = (d3 * 0xcd) >> 11;
149	d3 = d3 - 10*q;
150	buf++ = d3 + '0'; / next digit */
151	if (q != 0)
152	buf++ = q + '0'; / most sign. digit */
153	}
154	}
155	}
156		125
		126	/*
		127	* Possible ways to approx. divide by 10
		128	* (x * 0x1999999a) >> 32 x < 1073741829 (multiply must be 64-bit)
		129	* (x * 0xcccd) >> 19 x < 81920 (x < 262149 when 64-bit mul)
		130	* (x * 0x6667) >> 18 x < 43699
		131	* (x * 0x3334) >> 17 x < 16389
		132	* (x * 0x199a) >> 16 x < 16389
		133	* (x * 0x0ccd) >> 15 x < 16389
		134	* (x * 0x0667) >> 14 x < 2739
		135	* (x * 0x0334) >> 13 x < 1029
		136	* (x * 0x019a) >> 12 x < 1029
		137	* (x * 0x00cd) >> 11 x < 1029 shorter code than * 0x67 (on i386)
		138	* (x * 0x0067) >> 10 x < 179
		139	* (x * 0x0034) >> 9 x < 69 same
		140	* (x * 0x001a) >> 8 x < 69 same
		141	* (x * 0x000d) >> 7 x < 69 same, shortest code (on i386)
		142	* (x * 0x0007) >> 6 x < 19
		143	* See <http://www.cs.uiowa.edu/~jones/bcd/divide.html>
		144	*/
		145	r = (q * (uint64_t)0x1999999a) >> 32;
		146	buf++ = (q - 10 r) + '0'; /* 1 */
		147	q = (r * (uint64_t)0x1999999a) >> 32;
		148	buf++ = (r - 10 q) + '0'; /* 2 */
		149	r = (q * (uint64_t)0x1999999a) >> 32;
		150	buf++ = (q - 10 r) + '0'; /* 3 */
		151	q = (r * (uint64_t)0x1999999a) >> 32;
		152	buf++ = (r - 10 q) + '0'; /* 4 */
		153	r = (q * (uint64_t)0x1999999a) >> 32;
		154	buf++ = (q - 10 r) + '0'; /* 5 */
		155	/* Now value is under 10000, can avoid 64-bit multiply */
		156	q = (r * 0x199a) >> 16;
		157	buf++ = (r - 10 q) + '0'; /* 6 */
		158	r = (q * 0xcd) >> 11;
		159	buf++ = (q - 10 r) + '0'; /* 7 */
		160	q = (r * 0xcd) >> 11;
		161	buf++ = (r - 10 q) + '0'; /* 8 */
		162	buf++ = q + '0'; / 9 */
157	return buf;	163	return buf;
158	}	164	}
159	/* Same with if's removed. Always emits five digits */	165	#endif
		166
		167	/* Similar to above but do not pad with zeros.
		168	* Code can be easily arranged to print 9 digits too, but our callers
		169	* always call put_dec_full9() instead when the number has 9 decimal digits.
		170	*/
160	static noinline_for_stack	171	static noinline_for_stack
161	char put_dec_full(char buf, unsigned q)	172	char put_dec_trunc8(char buf, unsigned r)
162	{	173	{
163	/* BTW, if q is in [0,9999], 8-bit ints will be enough, */	174	unsigned q;
164	/* but anyway, gcc produces better code with full-sized ints */	175
165	unsigned d3, d2, d1, d0;	176	/* Copy of previous function's body with added early returns */
166	d1 = (q>>4) & 0xf;	177	q = (r * (uint64_t)0x1999999a) >> 32;
167	d2 = (q>>8) & 0xf;	178	buf++ = (r - 10 q) + '0'; /* 2 */
168	d3 = (q>>12);	179	if (q == 0)
		180	return buf;
		181	r = (q * (uint64_t)0x1999999a) >> 32;
		182	buf++ = (q - 10 r) + '0'; /* 3 */
		183	if (r == 0)
		184	return buf;
		185	q = (r * (uint64_t)0x1999999a) >> 32;
		186	buf++ = (r - 10 q) + '0'; /* 4 */
		187	if (q == 0)
		188	return buf;
		189	r = (q * (uint64_t)0x1999999a) >> 32;
		190	buf++ = (q - 10 r) + '0'; /* 5 */
		191	if (r == 0)
		192	return buf;
		193	q = (r * 0x199a) >> 16;
		194	buf++ = (r - 10 q) + '0'; /* 6 */
		195	if (q == 0)
		196	return buf;
		197	r = (q * 0xcd) >> 11;
		198	buf++ = (q - 10 r) + '0'; /* 7 */
		199	if (r == 0)
		200	return buf;
		201	q = (r * 0xcd) >> 11;
		202	buf++ = (r - 10 q) + '0'; /* 8 */
		203	if (q == 0)
		204	return buf;
		205	buf++ = q + '0'; / 9 */
		206	return buf;
		207	}
169		208
170	/*	209	/* There are two algorithms to print larger numbers.
171	* Possible ways to approx. divide by 10	210	* One is generic: divide by 1000000000 and repeatedly print
172	* gcc -O2 replaces multiply with shifts and adds	211	* groups of (up to) 9 digits. It's conceptually simple,
173	* (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386)	212	* but requires a (unsigned long long) / 1000000000 division.
174	* (x * 0x67) >> 10: 1100111	213	*
175	* (x * 0x34) >> 9: 110100 - same	214	* Second algorithm splits 64-bit unsigned long long into 16-bit chunks,
176	* (x * 0x1a) >> 8: 11010 - same	215	* manipulates them cleverly and generates groups of 4 decimal digits.
177	* (x * 0x0d) >> 7: 1101 - same, shortest code (on i386)	216	* It so happens that it does NOT require long long division.
178	*/	217	*
179	d0 = 6*(d3 + d2 + d1) + (q & 0xf);	218	* If long is > 32 bits, division of 64-bit values is relatively easy,
180	q = (d0 * 0xcd) >> 11;	219	* and we will use the first algorithm.
181	d0 = d0 - 10*q;	220	* If long long is > 64 bits (strange architecture with VERY large long long),
182	*buf++ = d0 + '0';	221	* second algorithm can't be used, and we again use the first one.
183	d1 = q + 9d3 + 5d2 + d1;	222	*
184	q = (d1 * 0xcd) >> 11;	223	* Else (if long is 32 bits and long long is 64 bits) we use second one.
185	d1 = d1 - 10*q;	224	*/
186	*buf++ = d1 + '0';
187
188	d2 = q + 2*d2;
189	q = (d2 * 0xd) >> 7;
190	d2 = d2 - 10*q;
191	*buf++ = d2 + '0';
192
193	d3 = q + 4*d3;
194	q = (d3 * 0xcd) >> 11; /* - shorter code */
195	/* q = (d3 * 0x67) >> 10; - would also work */
196	d3 = d3 - 10*q;
197	*buf++ = d3 + '0';
198	*buf++ = q + '0';
199		225
200	return buf;	226	#if BITS_PER_LONG != 32 \|\| BITS_PER_LONG_LONG != 64
		227
		228	/* First algorithm: generic */
		229
		230	static
		231	char put_dec(char buf, unsigned long long n)
		232	{
		233	if (n >= 10010001000) {
		234	while (n >= 100010001000)
		235	buf = put_dec_full9(buf, do_div(n, 100010001000));
		236	if (n >= 10010001000)
		237	return put_dec_full9(buf, n);
		238	}
		239	return put_dec_trunc8(buf, n);
201	}	240	}
202	/* No inlining helps gcc to use registers better */	241
		242	#else
		243
		244	/* Second algorithm: valid only for 64-bit long longs */
		245
203	static noinline_for_stack	246	static noinline_for_stack
204	char put_dec(char buf, unsigned long long num)	247	char put_dec_full4(char buf, unsigned q)
205	{	248	{
206	while (1) {	249	unsigned r;
207	unsigned rem;	250	r = (q * 0xcccd) >> 19;
208	if (num < 100000)	251	buf++ = (q - 10 r) + '0';
209	return put_dec_trunc(buf, num);	252	q = (r * 0x199a) >> 16;
210	rem = do_div(num, 100000);	253	buf++ = (r - 10 q) + '0';
211	buf = put_dec_full(buf, rem);	254	r = (q * 0xcd) >> 11;
212	}	255	buf++ = (q - 10 r) + '0';
		256	*buf++ = r + '0';
		257	return buf;
213	}	258	}
214		259
		260	/* Based on code by Douglas W. Jones found at
		261	* <http://www.cs.uiowa.edu/~jones/bcd/decimal.html#sixtyfour>
		262	* (with permission from the author).
		263	* Performs no 64-bit division and hence should be fast on 32-bit machines.
		264	*/
		265	static
		266	char put_dec(char buf, unsigned long long n)
		267	{
		268	uint32_t d3, d2, d1, q, h;
		269
		270	if (n < 10010001000)
		271	return put_dec_trunc8(buf, n);
		272
		273	d1 = ((uint32_t)n >> 16); /* implicit "& 0xffff" */
		274	h = (n >> 32);
		275	d2 = (h ) & 0xffff;
		276	d3 = (h >> 16); /* implicit "& 0xffff" */
		277
		278	q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff);
		279
		280	buf = put_dec_full4(buf, q % 10000);
		281	q = q / 10000;
		282
		283	d1 = q + 7671 * d3 + 9496 * d2 + 6 * d1;
		284	buf = put_dec_full4(buf, d1 % 10000);
		285	q = d1 / 10000;
		286
		287	d2 = q + 4749 * d3 + 42 * d2;
		288	buf = put_dec_full4(buf, d2 % 10000);
		289	q = d2 / 10000;
		290
		291	d3 = q + 281 * d3;
		292	if (!d3)
		293	goto done;
		294	buf = put_dec_full4(buf, d3 % 10000);
		295	q = d3 / 10000;
		296	if (!q)
		297	goto done;
		298	buf = put_dec_full4(buf, q);
		299	done:
		300	while (buf[-1] == '0')
		301	--buf;
		302
		303	return buf;
		304	}
		305
		306	#endif
		307
215	/*	308	/*
216	* Convert passed number to decimal string.	309	* Convert passed number to decimal string.
217	* Returns the length of string. On buffer overflow, returns 0.	310	* Returns the length of string. On buffer overflow, returns 0.
@@ -220,16 +313,22 @@ char put_dec(char buf, unsigned long long num)
220	*/	313	*/
221	int num_to_str(char *buf, int size, unsigned long long num)	314	int num_to_str(char *buf, int size, unsigned long long num)
222	{	315	{
223	char tmp[21]; /* Enough for 2^64 in decimal */	316	char tmp[sizeof(num) * 3];
224	int idx, len;	317	int idx, len;
225		318
226	len = put_dec(tmp, num) - tmp;	319	/* put_dec() may work incorrectly for num = 0 (generate "", not "0") */
		320	if (num <= 9) {
		321	tmp[0] = '0' + num;
		322	len = 1;
		323	} else {
		324	len = put_dec(tmp, num) - tmp;
		325	}
227		326
228	if (len > size)	327	if (len > size)
229	return 0;	328	return 0;
230	for (idx = 0; idx < len; ++idx)	329	for (idx = 0; idx < len; ++idx)
231	buf[idx] = tmp[len - idx - 1];	330	buf[idx] = tmp[len - idx - 1];
232	return len;	331	return len;
233	}	332	}
234		333
235	#define ZEROPAD 1 /* pad with zero */	334	#define ZEROPAD 1 /* pad with zero */
@@ -314,8 +413,8 @@ char number(char buf, char *end, unsigned long long num,
314		413
315	/* generate full string in tmp[], in reverse order */	414	/* generate full string in tmp[], in reverse order */
316	i = 0;	415	i = 0;
317	if (num == 0)	416	if (num < spec.base)
318	tmp[i++] = '0';	417	tmp[i++] = digits[num] \| locase;
319	/* Generic code, for any base:	418	/* Generic code, for any base:
320	else do {	419	else do {
321	tmp[i++] = (digits[do_div(num,base)] \| locase);	420	tmp[i++] = (digits[do_div(num,base)] \| locase);
@@ -611,7 +710,7 @@ char ip4_string(char p, const u8 addr, const char fmt)
611	}	710	}
612	for (i = 0; i < 4; i++) {	711	for (i = 0; i < 4; i++) {
613	char temp[3]; /* hold each IP quad in reverse order */	712	char temp[3]; /* hold each IP quad in reverse order */
614	int digits = put_dec_trunc(temp, addr[index]) - temp;	713	int digits = put_dec_trunc8(temp, addr[index]) - temp;
615	if (leading_zeros) {	714	if (leading_zeros) {
616	if (digits < 3)	715	if (digits < 3)
617	*p++ = '0';	716	*p++ = '0';
@@ -870,13 +969,15 @@ static noinline_for_stack
870	char pointer(const char fmt, char buf, char end, void *ptr,	969	char pointer(const char fmt, char buf, char end, void *ptr,
871	struct printf_spec spec)	970	struct printf_spec spec)
872	{	971	{
		972	int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0);
		973
873	if (!ptr && *fmt != 'K') {	974	if (!ptr && *fmt != 'K') {
874	/*	975	/*
875	* Print (null) with the same width as a pointer so it makes	976	* Print (null) with the same width as a pointer so it makes
876	* tabular output look nice.	977	* tabular output look nice.
877	*/	978	*/
878	if (spec.field_width == -1)	979	if (spec.field_width == -1)
879	spec.field_width = 2 * sizeof(void *);	980	spec.field_width = default_width;
880	return string(buf, end, "(null)", spec);	981	return string(buf, end, "(null)", spec);
881	}	982	}
882		983
@@ -931,7 +1032,7 @@ char pointer(const char fmt, char buf, char end, void *ptr,
931	*/	1032	*/
932	if (in_irq() \|\| in_serving_softirq() \|\| in_nmi()) {	1033	if (in_irq() \|\| in_serving_softirq() \|\| in_nmi()) {
933	if (spec.field_width == -1)	1034	if (spec.field_width == -1)
934	spec.field_width = 2 * sizeof(void *);	1035	spec.field_width = default_width;
935	return string(buf, end, "pK-error", spec);	1036	return string(buf, end, "pK-error", spec);
936	}	1037	}
937	if (!((kptr_restrict == 0) \|\|	1038	if (!((kptr_restrict == 0) \|\|
@@ -948,7 +1049,7 @@ char pointer(const char fmt, char buf, char end, void *ptr,
948	}	1049	}
949	spec.flags \|= SMALL;	1050	spec.flags \|= SMALL;
950	if (spec.field_width == -1) {	1051	if (spec.field_width == -1) {
951	spec.field_width = 2 * sizeof(void *);	1052	spec.field_width = default_width;
952	spec.flags \|= ZEROPAD;	1053	spec.flags \|= ZEROPAD;
953	}	1054	}
954	spec.base = 16;	1055	spec.base = 16;