1 files changed, 105 insertions, 3 deletions
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index e94b4bd25bc5..6b6734df6d2d 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -135,6 +135,103 @@ static int skip_atoi(const char **s)
        return i;
 }
+/* Decimal conversion is by far the most typical, and is used
+ * for /proc and /sys data. This directly impacts e.g. top performance
+ * with many processes running. We optimize it for speed
+ * using code from
+ * http://www.cs.uiowa.edu/~jones/bcd/decimal.html
+ * (with permission from the author, Douglas W. Jones). */
+/* Formats correctly any integer in [0,99999].
+ * Outputs from one to five digits depending on input.
+ * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */
+static char* put_dec_trunc(char *buf, unsigned q)
+{
+        unsigned d3, d2, d1, d0;
+        d1 = (q>>4) & 0xf;
+        d2 = (q>>8) & 0xf;
+        d3 = (q>>12);
+        d0 = 6*(d3 + d2 + d1) + (q & 0xf);
+        q = (d0 * 0xcd) >> 11;
+        d0 = d0 - 10*q;
+        *buf++ = d0 + '0'; /* least significant digit */
+        d1 = q + 9*d3 + 5*d2 + d1;
+        if (d1 != 0) {
+                q = (d1 * 0xcd) >> 11;
+                d1 = d1 - 10*q;
+                *buf++ = d1 + '0'; /* next digit */
+                d2 = q + 2*d2;
+                if ((d2 != 0) || (d3 != 0)) {
+                        q = (d2 * 0xd) >> 7;
+                        d2 = d2 - 10*q;
+                        *buf++ = d2 + '0'; /* next digit */
+                        d3 = q + 4*d3;
+                        if (d3 != 0) {
+                                q = (d3 * 0xcd) >> 11;
+                                d3 = d3 - 10*q;
+                                *buf++ = d3 + '0';  /* next digit */
+                                if (q != 0)
+                                        *buf++ = q + '0';  /* most sign. digit */
+                        }
+                }
+        }
+        return buf;
+}
+/* Same with if's removed. Always emits five digits */
+static char* put_dec_full(char *buf, unsigned q)
+{
+        /* BTW, if q is in [0,9999], 8-bit ints will be enough, */
+        /* but anyway, gcc produces better code with full-sized ints */
+        unsigned d3, d2, d1, d0;
+        d1 = (q>>4) & 0xf;
+        d2 = (q>>8) & 0xf;
+        d3 = (q>>12);
+        /* Possible ways to approx. divide by 10 */
+        /* gcc -O2 replaces multiply with shifts and adds */
+        // (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386)
+        // (x * 0x67) >> 10:  1100111
+        // (x * 0x34) >> 9:    110100 - same
+        // (x * 0x1a) >> 8:     11010 - same
+        // (x * 0x0d) >> 7:      1101 - same, shortest code (on i386)
+        d0 = 6*(d3 + d2 + d1) + (q & 0xf);
+        q = (d0 * 0xcd) >> 11;
+        d0 = d0 - 10*q;
+        *buf++ = d0 + '0';
+        d1 = q + 9*d3 + 5*d2 + d1;
+                q = (d1 * 0xcd) >> 11;
+                d1 = d1 - 10*q;
+                *buf++ = d1 + '0';
+                d2 = q + 2*d2;
+                        q = (d2 * 0xd) >> 7;
+                        d2 = d2 - 10*q;
+                        *buf++ = d2 + '0';
+                        d3 = q + 4*d3;
+                                q = (d3 * 0xcd) >> 11; /* - shorter code */
+                                /* q = (d3 * 0x67) >> 10; - would also work */
+                                d3 = d3 - 10*q;
+                                *buf++ = d3 + '0';
+                                        *buf++ = q + '0';
+        return buf;
+}
+/* No inlining helps gcc to use registers better */
+static noinline char* put_dec(char *buf, unsigned long long num)
+{
+        while (1) {
+                unsigned rem;
+                if (num < 100000)
+                        return put_dec_trunc(buf, num);
+                rem = do_div(num, 100000);
+                buf = put_dec_full(buf, rem);
+        }
+}
 #define ZEROPAD 1               /* pad with zero */
 #define SIGN    2               /* unsigned/signed long */
 #define PLUS    4               /* show plus */
@@ -182,6 +279,11 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
        i = 0;
        if (num == 0)
                tmp[i++] = '0';
+        /* Generic code, for any base:
+        else do {
+                tmp[i++] = digits[do_div(num,base)];
+        } while (num != 0);
+        */
        else if (base != 10) { /* 8 or 16 */
                int mask = base - 1;
                int shift = 3;
@@ -190,9 +292,9 @@ static char *number(char *buf, char *end, unsigned long long num, int base, int
                        tmp[i++] = digits[((unsigned char)num) & mask];
                        num >>= shift;
                } while (num);
-        } else do { /* generic code, works for any base */
+        } else { /* base 10 */
-                tmp[i++] = digits[do_div(num,10 /*base*/)];
+                i = put_dec(tmp, num) - tmp;
-        } while (num);
+        }
        /* printing 100 using %2d gives "100", not "00" */
        if (i > precision)

diff --git a/lib/vsprintf.c b/lib/vsprintf.c index e94b4bd25bc5..6b6734df6d2d 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c
@@ -135,6 +135,103 @@ static int skip_atoi(const char **s)
135	return i;	135	return i;
136	}	136	}
137		137
		138	/* Decimal conversion is by far the most typical, and is used
		139	* for /proc and /sys data. This directly impacts e.g. top performance
		140	* with many processes running. We optimize it for speed
		141	* using code from
		142	* http://www.cs.uiowa.edu/~jones/bcd/decimal.html
		143	* (with permission from the author, Douglas W. Jones). */
		144
		145	/* Formats correctly any integer in [0,99999].
		146	* Outputs from one to five digits depending on input.
		147	* On i386 gcc 4.1.2 -O2: ~250 bytes of code. */
		148	static char* put_dec_trunc(char *buf, unsigned q)
		149	{
		150	unsigned d3, d2, d1, d0;
		151	d1 = (q>>4) & 0xf;
		152	d2 = (q>>8) & 0xf;
		153	d3 = (q>>12);
		154
		155	d0 = 6*(d3 + d2 + d1) + (q & 0xf);
		156	q = (d0 * 0xcd) >> 11;
		157	d0 = d0 - 10*q;
		158	buf++ = d0 + '0'; / least significant digit */
		159	d1 = q + 9d3 + 5d2 + d1;
		160	if (d1 != 0) {
		161	q = (d1 * 0xcd) >> 11;
		162	d1 = d1 - 10*q;
		163	buf++ = d1 + '0'; / next digit */
		164
		165	d2 = q + 2*d2;
		166	if ((d2 != 0) \|\| (d3 != 0)) {
		167	q = (d2 * 0xd) >> 7;
		168	d2 = d2 - 10*q;
		169	buf++ = d2 + '0'; / next digit */
		170
		171	d3 = q + 4*d3;
		172	if (d3 != 0) {
		173	q = (d3 * 0xcd) >> 11;
		174	d3 = d3 - 10*q;
		175	buf++ = d3 + '0'; / next digit */
		176	if (q != 0)
		177	buf++ = q + '0'; / most sign. digit */
		178	}
		179	}
		180	}
		181	return buf;
		182	}
		183	/* Same with if's removed. Always emits five digits */
		184	static char* put_dec_full(char *buf, unsigned q)
		185	{
		186	/* BTW, if q is in [0,9999], 8-bit ints will be enough, */
		187	/* but anyway, gcc produces better code with full-sized ints */
		188	unsigned d3, d2, d1, d0;
		189	d1 = (q>>4) & 0xf;
		190	d2 = (q>>8) & 0xf;
		191	d3 = (q>>12);
		192
		193	/* Possible ways to approx. divide by 10 */
		194	/* gcc -O2 replaces multiply with shifts and adds */
		195	// (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386)
		196	// (x * 0x67) >> 10: 1100111
		197	// (x * 0x34) >> 9: 110100 - same
		198	// (x * 0x1a) >> 8: 11010 - same
		199	// (x * 0x0d) >> 7: 1101 - same, shortest code (on i386)
		200
		201	d0 = 6*(d3 + d2 + d1) + (q & 0xf);
		202	q = (d0 * 0xcd) >> 11;
		203	d0 = d0 - 10*q;
		204	*buf++ = d0 + '0';
		205	d1 = q + 9d3 + 5d2 + d1;
		206	q = (d1 * 0xcd) >> 11;
		207	d1 = d1 - 10*q;
		208	*buf++ = d1 + '0';
		209
		210	d2 = q + 2*d2;
		211	q = (d2 * 0xd) >> 7;
		212	d2 = d2 - 10*q;
		213	*buf++ = d2 + '0';
		214
		215	d3 = q + 4*d3;
		216	q = (d3 * 0xcd) >> 11; /* - shorter code */
		217	/* q = (d3 * 0x67) >> 10; - would also work */
		218	d3 = d3 - 10*q;
		219	*buf++ = d3 + '0';
		220	*buf++ = q + '0';
		221	return buf;
		222	}
		223	/* No inlining helps gcc to use registers better */
		224	static noinline char* put_dec(char *buf, unsigned long long num)
		225	{
		226	while (1) {
		227	unsigned rem;
		228	if (num < 100000)
		229	return put_dec_trunc(buf, num);
		230	rem = do_div(num, 100000);
		231	buf = put_dec_full(buf, rem);
		232	}
		233	}
		234
138	#define ZEROPAD 1 /* pad with zero */	235	#define ZEROPAD 1 /* pad with zero */
139	#define SIGN 2 /* unsigned/signed long */	236	#define SIGN 2 /* unsigned/signed long */
140	#define PLUS 4 /* show plus */	237	#define PLUS 4 /* show plus */
@@ -182,6 +279,11 @@ static char number(char buf, char *end, unsigned long long num, int base, int
182	i = 0;	279	i = 0;
183	if (num == 0)	280	if (num == 0)
184	tmp[i++] = '0';	281	tmp[i++] = '0';
		282	/* Generic code, for any base:
		283	else do {
		284	tmp[i++] = digits[do_div(num,base)];
		285	} while (num != 0);
		286	*/
185	else if (base != 10) { /* 8 or 16 */	287	else if (base != 10) { /* 8 or 16 */
186	int mask = base - 1;	288	int mask = base - 1;
187	int shift = 3;	289	int shift = 3;
@@ -190,9 +292,9 @@ static char number(char buf, char *end, unsigned long long num, int base, int
190	tmp[i++] = digits[((unsigned char)num) & mask];	292	tmp[i++] = digits[((unsigned char)num) & mask];
191	num >>= shift;	293	num >>= shift;
192	} while (num);	294	} while (num);
193	} else do { /* generic code, works for any base */	295	} else { /* base 10 */
194	tmp[i++] = digits[do_div(num,10 /base/)];	296	i = put_dec(tmp, num) - tmp;
195	} while (num);	297	}
196		298
197	/* printing 100 using %2d gives "100", not "00" */	299	/* printing 100 using %2d gives "100", not "00" */
198	if (i > precision)	300	if (i > precision)