diff options
author | Peter Jones <pjones@redhat.com> | 2016-02-08 14:48:11 -0500 |
---|---|---|
committer | Matt Fleming <matt@codeblueprint.co.uk> | 2016-02-10 08:19:03 -0500 |
commit | 73500267c930baadadb0d02284909731baf151f7 (patch) | |
tree | 431f07c5786db95d33f0f6d557cdba1881da29e3 | |
parent | 59fd1214561921343305a0e9dc218bf3d40068f3 (diff) |
lib/ucs2_string: Add ucs2 -> utf8 helper functions
This adds ucs2_utf8size(), which tells us how big our ucs2 string is in
bytes, and ucs2_as_utf8, which translates from ucs2 to utf8..
Signed-off-by: Peter Jones <pjones@redhat.com>
Tested-by: Lee, Chun-Yi <jlee@suse.com>
Acked-by: Matthew Garrett <mjg59@coreos.com>
Signed-off-by: Matt Fleming <matt@codeblueprint.co.uk>
-rw-r--r-- | include/linux/ucs2_string.h | 4 | ||||
-rw-r--r-- | lib/ucs2_string.c | 62 |
2 files changed, 66 insertions, 0 deletions
diff --git a/include/linux/ucs2_string.h b/include/linux/ucs2_string.h index cbb20afdbc01..bb679b48f408 100644 --- a/include/linux/ucs2_string.h +++ b/include/linux/ucs2_string.h | |||
@@ -11,4 +11,8 @@ unsigned long ucs2_strlen(const ucs2_char_t *s); | |||
11 | unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength); | 11 | unsigned long ucs2_strsize(const ucs2_char_t *data, unsigned long maxlength); |
12 | int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len); | 12 | int ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len); |
13 | 13 | ||
14 | unsigned long ucs2_utf8size(const ucs2_char_t *src); | ||
15 | unsigned long ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, | ||
16 | unsigned long maxlength); | ||
17 | |||
14 | #endif /* _LINUX_UCS2_STRING_H_ */ | 18 | #endif /* _LINUX_UCS2_STRING_H_ */ |
diff --git a/lib/ucs2_string.c b/lib/ucs2_string.c index 6f500ef2301d..17dd74e21ef9 100644 --- a/lib/ucs2_string.c +++ b/lib/ucs2_string.c | |||
@@ -49,3 +49,65 @@ ucs2_strncmp(const ucs2_char_t *a, const ucs2_char_t *b, size_t len) | |||
49 | } | 49 | } |
50 | } | 50 | } |
51 | EXPORT_SYMBOL(ucs2_strncmp); | 51 | EXPORT_SYMBOL(ucs2_strncmp); |
52 | |||
53 | unsigned long | ||
54 | ucs2_utf8size(const ucs2_char_t *src) | ||
55 | { | ||
56 | unsigned long i; | ||
57 | unsigned long j = 0; | ||
58 | |||
59 | for (i = 0; i < ucs2_strlen(src); i++) { | ||
60 | u16 c = src[i]; | ||
61 | |||
62 | if (c > 0x800) | ||
63 | j += 3; | ||
64 | else if (c > 0x80) | ||
65 | j += 2; | ||
66 | else | ||
67 | j += 1; | ||
68 | } | ||
69 | |||
70 | return j; | ||
71 | } | ||
72 | EXPORT_SYMBOL(ucs2_utf8size); | ||
73 | |||
74 | /* | ||
75 | * copy at most maxlength bytes of whole utf8 characters to dest from the | ||
76 | * ucs2 string src. | ||
77 | * | ||
78 | * The return value is the number of characters copied, not including the | ||
79 | * final NUL character. | ||
80 | */ | ||
81 | unsigned long | ||
82 | ucs2_as_utf8(u8 *dest, const ucs2_char_t *src, unsigned long maxlength) | ||
83 | { | ||
84 | unsigned int i; | ||
85 | unsigned long j = 0; | ||
86 | unsigned long limit = ucs2_strnlen(src, maxlength); | ||
87 | |||
88 | for (i = 0; maxlength && i < limit; i++) { | ||
89 | u16 c = src[i]; | ||
90 | |||
91 | if (c > 0x800) { | ||
92 | if (maxlength < 3) | ||
93 | break; | ||
94 | maxlength -= 3; | ||
95 | dest[j++] = 0xe0 | (c & 0xf000) >> 12; | ||
96 | dest[j++] = 0x80 | (c & 0x0fc0) >> 8; | ||
97 | dest[j++] = 0x80 | (c & 0x003f); | ||
98 | } else if (c > 0x80) { | ||
99 | if (maxlength < 2) | ||
100 | break; | ||
101 | maxlength -= 2; | ||
102 | dest[j++] = 0xc0 | (c & 0xfe0) >> 5; | ||
103 | dest[j++] = 0x80 | (c & 0x01f); | ||
104 | } else { | ||
105 | maxlength -= 1; | ||
106 | dest[j++] = c & 0x7f; | ||
107 | } | ||
108 | } | ||
109 | if (maxlength) | ||
110 | dest[j] = '\0'; | ||
111 | return j; | ||
112 | } | ||
113 | EXPORT_SYMBOL(ucs2_as_utf8); | ||