summaryrefslogtreecommitdiffstats
path: root/lib/vsprintf.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/vsprintf.c')
-rw-r--r--lib/vsprintf.c244
1 files changed, 126 insertions, 118 deletions
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 3a1e0843f9a2..da39c608a28c 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -33,6 +33,7 @@
33 33
34#include <asm/page.h> /* for PAGE_SIZE */ 34#include <asm/page.h> /* for PAGE_SIZE */
35#include <asm/sections.h> /* for dereference_function_descriptor() */ 35#include <asm/sections.h> /* for dereference_function_descriptor() */
36#include <asm/byteorder.h> /* cpu_to_le16 */
36 37
37#include <linux/string_helpers.h> 38#include <linux/string_helpers.h>
38#include "kstrtox.h" 39#include "kstrtox.h"
@@ -122,142 +123,145 @@ int skip_atoi(const char **s)
122 return i; 123 return i;
123} 124}
124 125
125/* Decimal conversion is by far the most typical, and is used 126/*
126 * for /proc and /sys data. This directly impacts e.g. top performance 127 * Decimal conversion is by far the most typical, and is used for
127 * with many processes running. We optimize it for speed 128 * /proc and /sys data. This directly impacts e.g. top performance
128 * using ideas described at <http://www.cs.uiowa.edu/~jones/bcd/divide.html> 129 * with many processes running. We optimize it for speed by emitting
129 * (with permission from the author, Douglas W. Jones). 130 * two characters at a time, using a 200 byte lookup table. This
131 * roughly halves the number of multiplications compared to computing
132 * the digits one at a time. Implementation strongly inspired by the
133 * previous version, which in turn used ideas described at
134 * <http://www.cs.uiowa.edu/~jones/bcd/divide.html> (with permission
135 * from the author, Douglas W. Jones).
136 *
137 * It turns out there is precisely one 26 bit fixed-point
138 * approximation a of 64/100 for which x/100 == (x * (u64)a) >> 32
139 * holds for all x in [0, 10^8-1], namely a = 0x28f5c29. The actual
140 * range happens to be somewhat larger (x <= 1073741898), but that's
141 * irrelevant for our purpose.
142 *
143 * For dividing a number in the range [10^4, 10^6-1] by 100, we still
144 * need a 32x32->64 bit multiply, so we simply use the same constant.
145 *
146 * For dividing a number in the range [100, 10^4-1] by 100, there are
147 * several options. The simplest is (x * 0x147b) >> 19, which is valid
148 * for all x <= 43698.
130 */ 149 */
131 150
132#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 151static const u16 decpair[100] = {
133/* Formats correctly any integer in [0, 999999999] */ 152#define _(x) (__force u16) cpu_to_le16(((x % 10) | ((x / 10) << 8)) + 0x3030)
153 _( 0), _( 1), _( 2), _( 3), _( 4), _( 5), _( 6), _( 7), _( 8), _( 9),
154 _(10), _(11), _(12), _(13), _(14), _(15), _(16), _(17), _(18), _(19),
155 _(20), _(21), _(22), _(23), _(24), _(25), _(26), _(27), _(28), _(29),
156 _(30), _(31), _(32), _(33), _(34), _(35), _(36), _(37), _(38), _(39),
157 _(40), _(41), _(42), _(43), _(44), _(45), _(46), _(47), _(48), _(49),
158 _(50), _(51), _(52), _(53), _(54), _(55), _(56), _(57), _(58), _(59),
159 _(60), _(61), _(62), _(63), _(64), _(65), _(66), _(67), _(68), _(69),
160 _(70), _(71), _(72), _(73), _(74), _(75), _(76), _(77), _(78), _(79),
161 _(80), _(81), _(82), _(83), _(84), _(85), _(86), _(87), _(88), _(89),
162 _(90), _(91), _(92), _(93), _(94), _(95), _(96), _(97), _(98), _(99),
163#undef _
164};
165
166/*
167 * This will print a single '0' even if r == 0, since we would
168 * immediately jump to out_r where two 0s would be written but only
169 * one of them accounted for in buf. This is needed by ip4_string
170 * below. All other callers pass a non-zero value of r.
171*/
134static noinline_for_stack 172static noinline_for_stack
135char *put_dec_full9(char *buf, unsigned q) 173char *put_dec_trunc8(char *buf, unsigned r)
136{ 174{
137 unsigned r; 175 unsigned q;
138 176
139 /* 177 /* 1 <= r < 10^8 */
140 * Possible ways to approx. divide by 10 178 if (r < 100)
141 * (x * 0x1999999a) >> 32 x < 1073741829 (multiply must be 64-bit) 179 goto out_r;
142 * (x * 0xcccd) >> 19 x < 81920 (x < 262149 when 64-bit mul) 180
143 * (x * 0x6667) >> 18 x < 43699 181 /* 100 <= r < 10^8 */
144 * (x * 0x3334) >> 17 x < 16389 182 q = (r * (u64)0x28f5c29) >> 32;
145 * (x * 0x199a) >> 16 x < 16389 183 *((u16 *)buf) = decpair[r - 100*q];
146 * (x * 0x0ccd) >> 15 x < 16389 184 buf += 2;
147 * (x * 0x0667) >> 14 x < 2739 185
148 * (x * 0x0334) >> 13 x < 1029 186 /* 1 <= q < 10^6 */
149 * (x * 0x019a) >> 12 x < 1029 187 if (q < 100)
150 * (x * 0x00cd) >> 11 x < 1029 shorter code than * 0x67 (on i386) 188 goto out_q;
151 * (x * 0x0067) >> 10 x < 179 189
152 * (x * 0x0034) >> 9 x < 69 same 190 /* 100 <= q < 10^6 */
153 * (x * 0x001a) >> 8 x < 69 same 191 r = (q * (u64)0x28f5c29) >> 32;
154 * (x * 0x000d) >> 7 x < 69 same, shortest code (on i386) 192 *((u16 *)buf) = decpair[q - 100*r];
155 * (x * 0x0007) >> 6 x < 19 193 buf += 2;
156 * See <http://www.cs.uiowa.edu/~jones/bcd/divide.html> 194
157 */ 195 /* 1 <= r < 10^4 */
158 r = (q * (uint64_t)0x1999999a) >> 32; 196 if (r < 100)
159 *buf++ = (q - 10 * r) + '0'; /* 1 */ 197 goto out_r;
160 q = (r * (uint64_t)0x1999999a) >> 32; 198
161 *buf++ = (r - 10 * q) + '0'; /* 2 */ 199 /* 100 <= r < 10^4 */
162 r = (q * (uint64_t)0x1999999a) >> 32; 200 q = (r * 0x147b) >> 19;
163 *buf++ = (q - 10 * r) + '0'; /* 3 */ 201 *((u16 *)buf) = decpair[r - 100*q];
164 q = (r * (uint64_t)0x1999999a) >> 32; 202 buf += 2;
165 *buf++ = (r - 10 * q) + '0'; /* 4 */ 203out_q:
166 r = (q * (uint64_t)0x1999999a) >> 32; 204 /* 1 <= q < 100 */
167 *buf++ = (q - 10 * r) + '0'; /* 5 */ 205 r = q;
168 /* Now value is under 10000, can avoid 64-bit multiply */ 206out_r:
169 q = (r * 0x199a) >> 16; 207 /* 1 <= r < 100 */
170 *buf++ = (r - 10 * q) + '0'; /* 6 */ 208 *((u16 *)buf) = decpair[r];
171 r = (q * 0xcd) >> 11; 209 buf += r < 10 ? 1 : 2;
172 *buf++ = (q - 10 * r) + '0'; /* 7 */
173 q = (r * 0xcd) >> 11;
174 *buf++ = (r - 10 * q) + '0'; /* 8 */
175 *buf++ = q + '0'; /* 9 */
176 return buf; 210 return buf;
177} 211}
178#endif
179 212
180/* Similar to above but do not pad with zeros. 213#if BITS_PER_LONG == 64 && BITS_PER_LONG_LONG == 64
181 * Code can be easily arranged to print 9 digits too, but our callers
182 * always call put_dec_full9() instead when the number has 9 decimal digits.
183 */
184static noinline_for_stack 214static noinline_for_stack
185char *put_dec_trunc8(char *buf, unsigned r) 215char *put_dec_full8(char *buf, unsigned r)
186{ 216{
187 unsigned q; 217 unsigned q;
188 218
189 /* Copy of previous function's body with added early returns */ 219 /* 0 <= r < 10^8 */
190 while (r >= 10000) { 220 q = (r * (u64)0x28f5c29) >> 32;
191 q = r + '0'; 221 *((u16 *)buf) = decpair[r - 100*q];
192 r = (r * (uint64_t)0x1999999a) >> 32; 222 buf += 2;
193 *buf++ = q - 10*r;
194 }
195 223
196 q = (r * 0x199a) >> 16; /* r <= 9999 */ 224 /* 0 <= q < 10^6 */
197 *buf++ = (r - 10 * q) + '0'; 225 r = (q * (u64)0x28f5c29) >> 32;
198 if (q == 0) 226 *((u16 *)buf) = decpair[q - 100*r];
199 return buf; 227 buf += 2;
200 r = (q * 0xcd) >> 11; /* q <= 999 */
201 *buf++ = (q - 10 * r) + '0';
202 if (r == 0)
203 return buf;
204 q = (r * 0xcd) >> 11; /* r <= 99 */
205 *buf++ = (r - 10 * q) + '0';
206 if (q == 0)
207 return buf;
208 *buf++ = q + '0'; /* q <= 9 */
209 return buf;
210}
211 228
212/* There are two algorithms to print larger numbers. 229 /* 0 <= r < 10^4 */
213 * One is generic: divide by 1000000000 and repeatedly print 230 q = (r * 0x147b) >> 19;
214 * groups of (up to) 9 digits. It's conceptually simple, 231 *((u16 *)buf) = decpair[r - 100*q];
215 * but requires a (unsigned long long) / 1000000000 division. 232 buf += 2;
216 *
217 * Second algorithm splits 64-bit unsigned long long into 16-bit chunks,
218 * manipulates them cleverly and generates groups of 4 decimal digits.
219 * It so happens that it does NOT require long long division.
220 *
221 * If long is > 32 bits, division of 64-bit values is relatively easy,
222 * and we will use the first algorithm.
223 * If long long is > 64 bits (strange architecture with VERY large long long),
224 * second algorithm can't be used, and we again use the first one.
225 *
226 * Else (if long is 32 bits and long long is 64 bits) we use second one.
227 */
228 233
229#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 234 /* 0 <= q < 100 */
230 235 *((u16 *)buf) = decpair[q];
231/* First algorithm: generic */ 236 buf += 2;
237 return buf;
238}
232 239
233static 240static noinline_for_stack
234char *put_dec(char *buf, unsigned long long n) 241char *put_dec(char *buf, unsigned long long n)
235{ 242{
236 if (n >= 100*1000*1000) { 243 if (n >= 100*1000*1000)
237 while (n >= 1000*1000*1000) 244 buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
238 buf = put_dec_full9(buf, do_div(n, 1000*1000*1000)); 245 /* 1 <= n <= 1.6e11 */
239 if (n >= 100*1000*1000) 246 if (n >= 100*1000*1000)
240 return put_dec_full9(buf, n); 247 buf = put_dec_full8(buf, do_div(n, 100*1000*1000));
241 } 248 /* 1 <= n < 1e8 */
242 return put_dec_trunc8(buf, n); 249 return put_dec_trunc8(buf, n);
243} 250}
244 251
245#else 252#elif BITS_PER_LONG == 32 && BITS_PER_LONG_LONG == 64
246 253
247/* Second algorithm: valid only for 64-bit long longs */ 254static void
248 255put_dec_full4(char *buf, unsigned r)
249/* See comment in put_dec_full9 for choice of constants */
250static noinline_for_stack
251void put_dec_full4(char *buf, unsigned q)
252{ 256{
253 unsigned r; 257 unsigned q;
254 r = (q * 0xccd) >> 15; 258
255 buf[0] = (q - 10 * r) + '0'; 259 /* 0 <= r < 10^4 */
256 q = (r * 0xcd) >> 11; 260 q = (r * 0x147b) >> 19;
257 buf[1] = (r - 10 * q) + '0'; 261 *((u16 *)buf) = decpair[r - 100*q];
258 r = (q * 0xcd) >> 11; 262 buf += 2;
259 buf[2] = (q - 10 * r) + '0'; 263 /* 0 <= q < 100 */
260 buf[3] = r + '0'; 264 *((u16 *)buf) = decpair[q];
261} 265}
262 266
263/* 267/*
@@ -265,9 +269,9 @@ void put_dec_full4(char *buf, unsigned q)
265 * The approximation x/10000 == (x * 0x346DC5D7) >> 43 269 * The approximation x/10000 == (x * 0x346DC5D7) >> 43
266 * holds for all x < 1,128,869,999. The largest value this 270 * holds for all x < 1,128,869,999. The largest value this
267 * helper will ever be asked to convert is 1,125,520,955. 271 * helper will ever be asked to convert is 1,125,520,955.
268 * (d1 in the put_dec code, assuming n is all-ones). 272 * (second call in the put_dec code, assuming n is all-ones).
269 */ 273 */
270static 274static noinline_for_stack
271unsigned put_dec_helper4(char *buf, unsigned x) 275unsigned put_dec_helper4(char *buf, unsigned x)
272{ 276{
273 uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43; 277 uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43;
@@ -294,6 +298,8 @@ char *put_dec(char *buf, unsigned long long n)
294 d2 = (h ) & 0xffff; 298 d2 = (h ) & 0xffff;
295 d3 = (h >> 16); /* implicit "& 0xffff" */ 299 d3 = (h >> 16); /* implicit "& 0xffff" */
296 300
301 /* n = 2^48 d3 + 2^32 d2 + 2^16 d1 + d0
302 = 281_4749_7671_0656 d3 + 42_9496_7296 d2 + 6_5536 d1 + d0 */
297 q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff); 303 q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff);
298 q = put_dec_helper4(buf, q); 304 q = put_dec_helper4(buf, q);
299 305
@@ -323,7 +329,8 @@ char *put_dec(char *buf, unsigned long long n)
323 */ 329 */
324int num_to_str(char *buf, int size, unsigned long long num) 330int num_to_str(char *buf, int size, unsigned long long num)
325{ 331{
326 char tmp[sizeof(num) * 3]; 332 /* put_dec requires 2-byte alignment of the buffer. */
333 char tmp[sizeof(num) * 3] __aligned(2);
327 int idx, len; 334 int idx, len;
328 335
329 /* put_dec() may work incorrectly for num = 0 (generate "", not "0") */ 336 /* put_dec() may work incorrectly for num = 0 (generate "", not "0") */
@@ -384,7 +391,8 @@ static noinline_for_stack
384char *number(char *buf, char *end, unsigned long long num, 391char *number(char *buf, char *end, unsigned long long num,
385 struct printf_spec spec) 392 struct printf_spec spec)
386{ 393{
387 char tmp[3 * sizeof(num)]; 394 /* put_dec requires 2-byte alignment of the buffer. */
395 char tmp[3 * sizeof(num)] __aligned(2);
388 char sign; 396 char sign;
389 char locase; 397 char locase;
390 int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10); 398 int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10);
@@ -944,7 +952,7 @@ char *ip4_string(char *p, const u8 *addr, const char *fmt)
944 break; 952 break;
945 } 953 }
946 for (i = 0; i < 4; i++) { 954 for (i = 0; i < 4; i++) {
947 char temp[3]; /* hold each IP quad in reverse order */ 955 char temp[4] __aligned(2); /* hold each IP quad in reverse order */
948 int digits = put_dec_trunc8(temp, addr[index]) - temp; 956 int digits = put_dec_trunc8(temp, addr[index]) - temp;
949 if (leading_zeros) { 957 if (leading_zeros) {
950 if (digits < 3) 958 if (digits < 3)