diff options
Diffstat (limited to 'lib/vsprintf.c')
| -rw-r--r-- | lib/vsprintf.c | 289 |
1 files changed, 195 insertions, 94 deletions
diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 5391299c1e78..c3f36d415bdf 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c | |||
| @@ -112,106 +112,199 @@ int skip_atoi(const char **s) | |||
| 112 | /* Decimal conversion is by far the most typical, and is used | 112 | /* Decimal conversion is by far the most typical, and is used |
| 113 | * for /proc and /sys data. This directly impacts e.g. top performance | 113 | * for /proc and /sys data. This directly impacts e.g. top performance |
| 114 | * with many processes running. We optimize it for speed | 114 | * with many processes running. We optimize it for speed |
| 115 | * using code from | 115 | * using ideas described at <http://www.cs.uiowa.edu/~jones/bcd/divide.html> |
| 116 | * http://www.cs.uiowa.edu/~jones/bcd/decimal.html | 116 | * (with permission from the author, Douglas W. Jones). |
| 117 | * (with permission from the author, Douglas W. Jones). */ | 117 | */ |
| 118 | 118 | ||
| 119 | /* Formats correctly any integer in [0,99999]. | 119 | #if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 |
| 120 | * Outputs from one to five digits depending on input. | 120 | /* Formats correctly any integer in [0, 999999999] */ |
| 121 | * On i386 gcc 4.1.2 -O2: ~250 bytes of code. */ | ||
| 122 | static noinline_for_stack | 121 | static noinline_for_stack |
| 123 | char *put_dec_trunc(char *buf, unsigned q) | 122 | char *put_dec_full9(char *buf, unsigned q) |
| 124 | { | 123 | { |
| 125 | unsigned d3, d2, d1, d0; | 124 | unsigned r; |
| 126 | d1 = (q>>4) & 0xf; | ||
| 127 | d2 = (q>>8) & 0xf; | ||
| 128 | d3 = (q>>12); | ||
| 129 | |||
| 130 | d0 = 6*(d3 + d2 + d1) + (q & 0xf); | ||
| 131 | q = (d0 * 0xcd) >> 11; | ||
| 132 | d0 = d0 - 10*q; | ||
| 133 | *buf++ = d0 + '0'; /* least significant digit */ | ||
| 134 | d1 = q + 9*d3 + 5*d2 + d1; | ||
| 135 | if (d1 != 0) { | ||
| 136 | q = (d1 * 0xcd) >> 11; | ||
| 137 | d1 = d1 - 10*q; | ||
| 138 | *buf++ = d1 + '0'; /* next digit */ | ||
| 139 | |||
| 140 | d2 = q + 2*d2; | ||
| 141 | if ((d2 != 0) || (d3 != 0)) { | ||
| 142 | q = (d2 * 0xd) >> 7; | ||
| 143 | d2 = d2 - 10*q; | ||
| 144 | *buf++ = d2 + '0'; /* next digit */ | ||
| 145 | |||
| 146 | d3 = q + 4*d3; | ||
| 147 | if (d3 != 0) { | ||
| 148 | q = (d3 * 0xcd) >> 11; | ||
| 149 | d3 = d3 - 10*q; | ||
| 150 | *buf++ = d3 + '0'; /* next digit */ | ||
| 151 | if (q != 0) | ||
| 152 | *buf++ = q + '0'; /* most sign. digit */ | ||
| 153 | } | ||
| 154 | } | ||
| 155 | } | ||
| 156 | 125 | ||
| 126 | /* | ||
| 127 | * Possible ways to approx. divide by 10 | ||
| 128 | * (x * 0x1999999a) >> 32 x < 1073741829 (multiply must be 64-bit) | ||
| 129 | * (x * 0xcccd) >> 19 x < 81920 (x < 262149 when 64-bit mul) | ||
| 130 | * (x * 0x6667) >> 18 x < 43699 | ||
| 131 | * (x * 0x3334) >> 17 x < 16389 | ||
| 132 | * (x * 0x199a) >> 16 x < 16389 | ||
| 133 | * (x * 0x0ccd) >> 15 x < 16389 | ||
| 134 | * (x * 0x0667) >> 14 x < 2739 | ||
| 135 | * (x * 0x0334) >> 13 x < 1029 | ||
| 136 | * (x * 0x019a) >> 12 x < 1029 | ||
| 137 | * (x * 0x00cd) >> 11 x < 1029 shorter code than * 0x67 (on i386) | ||
| 138 | * (x * 0x0067) >> 10 x < 179 | ||
| 139 | * (x * 0x0034) >> 9 x < 69 same | ||
| 140 | * (x * 0x001a) >> 8 x < 69 same | ||
| 141 | * (x * 0x000d) >> 7 x < 69 same, shortest code (on i386) | ||
| 142 | * (x * 0x0007) >> 6 x < 19 | ||
| 143 | * See <http://www.cs.uiowa.edu/~jones/bcd/divide.html> | ||
| 144 | */ | ||
| 145 | r = (q * (uint64_t)0x1999999a) >> 32; | ||
| 146 | *buf++ = (q - 10 * r) + '0'; /* 1 */ | ||
| 147 | q = (r * (uint64_t)0x1999999a) >> 32; | ||
| 148 | *buf++ = (r - 10 * q) + '0'; /* 2 */ | ||
| 149 | r = (q * (uint64_t)0x1999999a) >> 32; | ||
| 150 | *buf++ = (q - 10 * r) + '0'; /* 3 */ | ||
| 151 | q = (r * (uint64_t)0x1999999a) >> 32; | ||
| 152 | *buf++ = (r - 10 * q) + '0'; /* 4 */ | ||
| 153 | r = (q * (uint64_t)0x1999999a) >> 32; | ||
| 154 | *buf++ = (q - 10 * r) + '0'; /* 5 */ | ||
| 155 | /* Now value is under 10000, can avoid 64-bit multiply */ | ||
| 156 | q = (r * 0x199a) >> 16; | ||
| 157 | *buf++ = (r - 10 * q) + '0'; /* 6 */ | ||
| 158 | r = (q * 0xcd) >> 11; | ||
| 159 | *buf++ = (q - 10 * r) + '0'; /* 7 */ | ||
| 160 | q = (r * 0xcd) >> 11; | ||
| 161 | *buf++ = (r - 10 * q) + '0'; /* 8 */ | ||
| 162 | *buf++ = q + '0'; /* 9 */ | ||
| 157 | return buf; | 163 | return buf; |
| 158 | } | 164 | } |
| 159 | /* Same with if's removed. Always emits five digits */ | 165 | #endif |
| 166 | |||
| 167 | /* Similar to above but do not pad with zeros. | ||
| 168 | * Code can be easily arranged to print 9 digits too, but our callers | ||
| 169 | * always call put_dec_full9() instead when the number has 9 decimal digits. | ||
| 170 | */ | ||
| 160 | static noinline_for_stack | 171 | static noinline_for_stack |
| 161 | char *put_dec_full(char *buf, unsigned q) | 172 | char *put_dec_trunc8(char *buf, unsigned r) |
| 162 | { | 173 | { |
| 163 | /* BTW, if q is in [0,9999], 8-bit ints will be enough, */ | 174 | unsigned q; |
| 164 | /* but anyway, gcc produces better code with full-sized ints */ | 175 | |
| 165 | unsigned d3, d2, d1, d0; | 176 | /* Copy of previous function's body with added early returns */ |
| 166 | d1 = (q>>4) & 0xf; | 177 | q = (r * (uint64_t)0x1999999a) >> 32; |
| 167 | d2 = (q>>8) & 0xf; | 178 | *buf++ = (r - 10 * q) + '0'; /* 2 */ |
| 168 | d3 = (q>>12); | 179 | if (q == 0) |
| 180 | return buf; | ||
| 181 | r = (q * (uint64_t)0x1999999a) >> 32; | ||
| 182 | *buf++ = (q - 10 * r) + '0'; /* 3 */ | ||
| 183 | if (r == 0) | ||
| 184 | return buf; | ||
| 185 | q = (r * (uint64_t)0x1999999a) >> 32; | ||
| 186 | *buf++ = (r - 10 * q) + '0'; /* 4 */ | ||
| 187 | if (q == 0) | ||
| 188 | return buf; | ||
| 189 | r = (q * (uint64_t)0x1999999a) >> 32; | ||
| 190 | *buf++ = (q - 10 * r) + '0'; /* 5 */ | ||
| 191 | if (r == 0) | ||
| 192 | return buf; | ||
| 193 | q = (r * 0x199a) >> 16; | ||
| 194 | *buf++ = (r - 10 * q) + '0'; /* 6 */ | ||
| 195 | if (q == 0) | ||
| 196 | return buf; | ||
| 197 | r = (q * 0xcd) >> 11; | ||
| 198 | *buf++ = (q - 10 * r) + '0'; /* 7 */ | ||
| 199 | if (r == 0) | ||
| 200 | return buf; | ||
| 201 | q = (r * 0xcd) >> 11; | ||
| 202 | *buf++ = (r - 10 * q) + '0'; /* 8 */ | ||
| 203 | if (q == 0) | ||
| 204 | return buf; | ||
| 205 | *buf++ = q + '0'; /* 9 */ | ||
| 206 | return buf; | ||
| 207 | } | ||
| 169 | 208 | ||
| 170 | /* | 209 | /* There are two algorithms to print larger numbers. |
| 171 | * Possible ways to approx. divide by 10 | 210 | * One is generic: divide by 1000000000 and repeatedly print |
| 172 | * gcc -O2 replaces multiply with shifts and adds | 211 | * groups of (up to) 9 digits. It's conceptually simple, |
| 173 | * (x * 0xcd) >> 11: 11001101 - shorter code than * 0x67 (on i386) | 212 | * but requires a (unsigned long long) / 1000000000 division. |
| 174 | * (x * 0x67) >> 10: 1100111 | 213 | * |
| 175 | * (x * 0x34) >> 9: 110100 - same | 214 | * Second algorithm splits 64-bit unsigned long long into 16-bit chunks, |
| 176 | * (x * 0x1a) >> 8: 11010 - same | 215 | * manipulates them cleverly and generates groups of 4 decimal digits. |
| 177 | * (x * 0x0d) >> 7: 1101 - same, shortest code (on i386) | 216 | * It so happens that it does NOT require long long division. |
| 178 | */ | 217 | * |
| 179 | d0 = 6*(d3 + d2 + d1) + (q & 0xf); | 218 | * If long is > 32 bits, division of 64-bit values is relatively easy, |
| 180 | q = (d0 * 0xcd) >> 11; | 219 | * and we will use the first algorithm. |
| 181 | d0 = d0 - 10*q; | 220 | * If long long is > 64 bits (strange architecture with VERY large long long), |
| 182 | *buf++ = d0 + '0'; | 221 | * second algorithm can't be used, and we again use the first one. |
| 183 | d1 = q + 9*d3 + 5*d2 + d1; | 222 | * |
| 184 | q = (d1 * 0xcd) >> 11; | 223 | * Else (if long is 32 bits and long long is 64 bits) we use second one. |
| 185 | d1 = d1 - 10*q; | 224 | */ |
| 186 | *buf++ = d1 + '0'; | ||
| 187 | |||
| 188 | d2 = q + 2*d2; | ||
| 189 | q = (d2 * 0xd) >> 7; | ||
| 190 | d2 = d2 - 10*q; | ||
| 191 | *buf++ = d2 + '0'; | ||
| 192 | |||
| 193 | d3 = q + 4*d3; | ||
| 194 | q = (d3 * 0xcd) >> 11; /* - shorter code */ | ||
| 195 | /* q = (d3 * 0x67) >> 10; - would also work */ | ||
| 196 | d3 = d3 - 10*q; | ||
| 197 | *buf++ = d3 + '0'; | ||
| 198 | *buf++ = q + '0'; | ||
| 199 | 225 | ||
| 200 | return buf; | 226 | #if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 |
| 227 | |||
| 228 | /* First algorithm: generic */ | ||
| 229 | |||
| 230 | static | ||
| 231 | char *put_dec(char *buf, unsigned long long n) | ||
| 232 | { | ||
| 233 | if (n >= 100*1000*1000) { | ||
| 234 | while (n >= 1000*1000*1000) | ||
| 235 | buf = put_dec_full9(buf, do_div(n, 1000*1000*1000)); | ||
| 236 | if (n >= 100*1000*1000) | ||
| 237 | return put_dec_full9(buf, n); | ||
| 238 | } | ||
| 239 | return put_dec_trunc8(buf, n); | ||
| 201 | } | 240 | } |
| 202 | /* No inlining helps gcc to use registers better */ | 241 | |
| 242 | #else | ||
| 243 | |||
| 244 | /* Second algorithm: valid only for 64-bit long longs */ | ||
| 245 | |||
| 203 | static noinline_for_stack | 246 | static noinline_for_stack |
| 204 | char *put_dec(char *buf, unsigned long long num) | 247 | char *put_dec_full4(char *buf, unsigned q) |
| 205 | { | 248 | { |
| 206 | while (1) { | 249 | unsigned r; |
| 207 | unsigned rem; | 250 | r = (q * 0xcccd) >> 19; |
| 208 | if (num < 100000) | 251 | *buf++ = (q - 10 * r) + '0'; |
| 209 | return put_dec_trunc(buf, num); | 252 | q = (r * 0x199a) >> 16; |
| 210 | rem = do_div(num, 100000); | 253 | *buf++ = (r - 10 * q) + '0'; |
| 211 | buf = put_dec_full(buf, rem); | 254 | r = (q * 0xcd) >> 11; |
| 212 | } | 255 | *buf++ = (q - 10 * r) + '0'; |
| 256 | *buf++ = r + '0'; | ||
| 257 | return buf; | ||
| 213 | } | 258 | } |
| 214 | 259 | ||
| 260 | /* Based on code by Douglas W. Jones found at | ||
| 261 | * <http://www.cs.uiowa.edu/~jones/bcd/decimal.html#sixtyfour> | ||
| 262 | * (with permission from the author). | ||
| 263 | * Performs no 64-bit division and hence should be fast on 32-bit machines. | ||
| 264 | */ | ||
| 265 | static | ||
| 266 | char *put_dec(char *buf, unsigned long long n) | ||
| 267 | { | ||
| 268 | uint32_t d3, d2, d1, q, h; | ||
| 269 | |||
| 270 | if (n < 100*1000*1000) | ||
| 271 | return put_dec_trunc8(buf, n); | ||
| 272 | |||
| 273 | d1 = ((uint32_t)n >> 16); /* implicit "& 0xffff" */ | ||
| 274 | h = (n >> 32); | ||
| 275 | d2 = (h ) & 0xffff; | ||
| 276 | d3 = (h >> 16); /* implicit "& 0xffff" */ | ||
| 277 | |||
| 278 | q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff); | ||
| 279 | |||
| 280 | buf = put_dec_full4(buf, q % 10000); | ||
| 281 | q = q / 10000; | ||
| 282 | |||
| 283 | d1 = q + 7671 * d3 + 9496 * d2 + 6 * d1; | ||
| 284 | buf = put_dec_full4(buf, d1 % 10000); | ||
| 285 | q = d1 / 10000; | ||
| 286 | |||
| 287 | d2 = q + 4749 * d3 + 42 * d2; | ||
| 288 | buf = put_dec_full4(buf, d2 % 10000); | ||
| 289 | q = d2 / 10000; | ||
| 290 | |||
| 291 | d3 = q + 281 * d3; | ||
| 292 | if (!d3) | ||
| 293 | goto done; | ||
| 294 | buf = put_dec_full4(buf, d3 % 10000); | ||
| 295 | q = d3 / 10000; | ||
| 296 | if (!q) | ||
| 297 | goto done; | ||
| 298 | buf = put_dec_full4(buf, q); | ||
| 299 | done: | ||
| 300 | while (buf[-1] == '0') | ||
| 301 | --buf; | ||
| 302 | |||
| 303 | return buf; | ||
| 304 | } | ||
| 305 | |||
| 306 | #endif | ||
| 307 | |||
| 215 | /* | 308 | /* |
| 216 | * Convert passed number to decimal string. | 309 | * Convert passed number to decimal string. |
| 217 | * Returns the length of string. On buffer overflow, returns 0. | 310 | * Returns the length of string. On buffer overflow, returns 0. |
| @@ -220,16 +313,22 @@ char *put_dec(char *buf, unsigned long long num) | |||
| 220 | */ | 313 | */ |
| 221 | int num_to_str(char *buf, int size, unsigned long long num) | 314 | int num_to_str(char *buf, int size, unsigned long long num) |
| 222 | { | 315 | { |
| 223 | char tmp[21]; /* Enough for 2^64 in decimal */ | 316 | char tmp[sizeof(num) * 3]; |
| 224 | int idx, len; | 317 | int idx, len; |
| 225 | 318 | ||
| 226 | len = put_dec(tmp, num) - tmp; | 319 | /* put_dec() may work incorrectly for num = 0 (generate "", not "0") */ |
| 320 | if (num <= 9) { | ||
| 321 | tmp[0] = '0' + num; | ||
| 322 | len = 1; | ||
| 323 | } else { | ||
| 324 | len = put_dec(tmp, num) - tmp; | ||
| 325 | } | ||
| 227 | 326 | ||
| 228 | if (len > size) | 327 | if (len > size) |
| 229 | return 0; | 328 | return 0; |
| 230 | for (idx = 0; idx < len; ++idx) | 329 | for (idx = 0; idx < len; ++idx) |
| 231 | buf[idx] = tmp[len - idx - 1]; | 330 | buf[idx] = tmp[len - idx - 1]; |
| 232 | return len; | 331 | return len; |
| 233 | } | 332 | } |
| 234 | 333 | ||
| 235 | #define ZEROPAD 1 /* pad with zero */ | 334 | #define ZEROPAD 1 /* pad with zero */ |
| @@ -314,8 +413,8 @@ char *number(char *buf, char *end, unsigned long long num, | |||
| 314 | 413 | ||
| 315 | /* generate full string in tmp[], in reverse order */ | 414 | /* generate full string in tmp[], in reverse order */ |
| 316 | i = 0; | 415 | i = 0; |
| 317 | if (num == 0) | 416 | if (num < spec.base) |
| 318 | tmp[i++] = '0'; | 417 | tmp[i++] = digits[num] | locase; |
| 319 | /* Generic code, for any base: | 418 | /* Generic code, for any base: |
| 320 | else do { | 419 | else do { |
| 321 | tmp[i++] = (digits[do_div(num,base)] | locase); | 420 | tmp[i++] = (digits[do_div(num,base)] | locase); |
| @@ -611,7 +710,7 @@ char *ip4_string(char *p, const u8 *addr, const char *fmt) | |||
| 611 | } | 710 | } |
| 612 | for (i = 0; i < 4; i++) { | 711 | for (i = 0; i < 4; i++) { |
| 613 | char temp[3]; /* hold each IP quad in reverse order */ | 712 | char temp[3]; /* hold each IP quad in reverse order */ |
| 614 | int digits = put_dec_trunc(temp, addr[index]) - temp; | 713 | int digits = put_dec_trunc8(temp, addr[index]) - temp; |
| 615 | if (leading_zeros) { | 714 | if (leading_zeros) { |
| 616 | if (digits < 3) | 715 | if (digits < 3) |
| 617 | *p++ = '0'; | 716 | *p++ = '0'; |
| @@ -870,13 +969,15 @@ static noinline_for_stack | |||
| 870 | char *pointer(const char *fmt, char *buf, char *end, void *ptr, | 969 | char *pointer(const char *fmt, char *buf, char *end, void *ptr, |
| 871 | struct printf_spec spec) | 970 | struct printf_spec spec) |
| 872 | { | 971 | { |
| 972 | int default_width = 2 * sizeof(void *) + (spec.flags & SPECIAL ? 2 : 0); | ||
| 973 | |||
| 873 | if (!ptr && *fmt != 'K') { | 974 | if (!ptr && *fmt != 'K') { |
| 874 | /* | 975 | /* |
| 875 | * Print (null) with the same width as a pointer so it makes | 976 | * Print (null) with the same width as a pointer so it makes |
| 876 | * tabular output look nice. | 977 | * tabular output look nice. |
| 877 | */ | 978 | */ |
| 878 | if (spec.field_width == -1) | 979 | if (spec.field_width == -1) |
| 879 | spec.field_width = 2 * sizeof(void *); | 980 | spec.field_width = default_width; |
| 880 | return string(buf, end, "(null)", spec); | 981 | return string(buf, end, "(null)", spec); |
| 881 | } | 982 | } |
| 882 | 983 | ||
| @@ -931,7 +1032,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, | |||
| 931 | */ | 1032 | */ |
| 932 | if (in_irq() || in_serving_softirq() || in_nmi()) { | 1033 | if (in_irq() || in_serving_softirq() || in_nmi()) { |
| 933 | if (spec.field_width == -1) | 1034 | if (spec.field_width == -1) |
| 934 | spec.field_width = 2 * sizeof(void *); | 1035 | spec.field_width = default_width; |
| 935 | return string(buf, end, "pK-error", spec); | 1036 | return string(buf, end, "pK-error", spec); |
| 936 | } | 1037 | } |
| 937 | if (!((kptr_restrict == 0) || | 1038 | if (!((kptr_restrict == 0) || |
| @@ -948,7 +1049,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, | |||
| 948 | } | 1049 | } |
| 949 | spec.flags |= SMALL; | 1050 | spec.flags |= SMALL; |
| 950 | if (spec.field_width == -1) { | 1051 | if (spec.field_width == -1) { |
| 951 | spec.field_width = 2 * sizeof(void *); | 1052 | spec.field_width = default_width; |
| 952 | spec.flags |= ZEROPAD; | 1053 | spec.flags |= ZEROPAD; |
| 953 | } | 1054 | } |
| 954 | spec.base = 16; | 1055 | spec.base = 16; |
