diff options
Diffstat (limited to 'fs/udf/unicode.c')
-rw-r--r-- | fs/udf/unicode.c | 260 |
1 files changed, 126 insertions, 134 deletions
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 16a8ad21b77e..45234791fec2 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c | |||
@@ -28,101 +28,64 @@ | |||
28 | 28 | ||
29 | #include "udf_sb.h" | 29 | #include "udf_sb.h" |
30 | 30 | ||
31 | #define PLANE_SIZE 0x10000 | ||
32 | #define UNICODE_MAX 0x10ffff | ||
31 | #define SURROGATE_MASK 0xfffff800 | 33 | #define SURROGATE_MASK 0xfffff800 |
32 | #define SURROGATE_PAIR 0x0000d800 | 34 | #define SURROGATE_PAIR 0x0000d800 |
35 | #define SURROGATE_LOW 0x00000400 | ||
36 | #define SURROGATE_CHAR_BITS 10 | ||
37 | #define SURROGATE_CHAR_MASK ((1 << SURROGATE_CHAR_BITS) - 1) | ||
33 | 38 | ||
34 | static int udf_uni2char_utf8(wchar_t uni, | 39 | #define ILLEGAL_CHAR_MARK '_' |
35 | unsigned char *out, | 40 | #define EXT_MARK '.' |
36 | int boundlen) | 41 | #define CRC_MARK '#' |
37 | { | 42 | #define EXT_SIZE 5 |
38 | int u_len = 0; | 43 | /* Number of chars we need to store generated CRC to make filename unique */ |
39 | 44 | #define CRC_LEN 5 | |
40 | if (boundlen <= 0) | ||
41 | return -ENAMETOOLONG; | ||
42 | 45 | ||
43 | if ((uni & SURROGATE_MASK) == SURROGATE_PAIR) | 46 | static unicode_t get_utf16_char(const uint8_t *str_i, int str_i_max_len, |
44 | return -EINVAL; | 47 | int str_i_idx, int u_ch, unicode_t *ret) |
48 | { | ||
49 | unicode_t c; | ||
50 | int start_idx = str_i_idx; | ||
51 | |||
52 | /* Expand OSTA compressed Unicode to Unicode */ | ||
53 | c = str_i[str_i_idx++]; | ||
54 | if (u_ch > 1) | ||
55 | c = (c << 8) | str_i[str_i_idx++]; | ||
56 | if ((c & SURROGATE_MASK) == SURROGATE_PAIR) { | ||
57 | unicode_t next; | ||
58 | |||
59 | /* Trailing surrogate char */ | ||
60 | if (str_i_idx >= str_i_max_len) { | ||
61 | c = UNICODE_MAX + 1; | ||
62 | goto out; | ||
63 | } | ||
45 | 64 | ||
46 | if (uni < 0x80) { | 65 | /* Low surrogate must follow the high one... */ |
47 | out[u_len++] = (unsigned char)uni; | 66 | if (c & SURROGATE_LOW) { |
48 | } else if (uni < 0x800) { | 67 | c = UNICODE_MAX + 1; |
49 | if (boundlen < 2) | 68 | goto out; |
50 | return -ENAMETOOLONG; | 69 | } |
51 | out[u_len++] = (unsigned char)(0xc0 | (uni >> 6)); | ||
52 | out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); | ||
53 | } else { | ||
54 | if (boundlen < 3) | ||
55 | return -ENAMETOOLONG; | ||
56 | out[u_len++] = (unsigned char)(0xe0 | (uni >> 12)); | ||
57 | out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f)); | ||
58 | out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f)); | ||
59 | } | ||
60 | return u_len; | ||
61 | } | ||
62 | 70 | ||
63 | static int udf_char2uni_utf8(const unsigned char *in, | 71 | WARN_ON_ONCE(u_ch != 2); |
64 | int boundlen, | 72 | next = str_i[str_i_idx++] << 8; |
65 | wchar_t *uni) | 73 | next |= str_i[str_i_idx++]; |
66 | { | 74 | if ((next & SURROGATE_MASK) != SURROGATE_PAIR || |
67 | unsigned int utf_char; | 75 | !(next & SURROGATE_LOW)) { |
68 | unsigned char c; | 76 | c = UNICODE_MAX + 1; |
69 | int utf_cnt, u_len; | 77 | goto out; |
70 | |||
71 | utf_char = 0; | ||
72 | utf_cnt = 0; | ||
73 | for (u_len = 0; u_len < boundlen;) { | ||
74 | c = in[u_len++]; | ||
75 | |||
76 | /* Complete a multi-byte UTF-8 character */ | ||
77 | if (utf_cnt) { | ||
78 | utf_char = (utf_char << 6) | (c & 0x3f); | ||
79 | if (--utf_cnt) | ||
80 | continue; | ||
81 | } else { | ||
82 | /* Check for a multi-byte UTF-8 character */ | ||
83 | if (c & 0x80) { | ||
84 | /* Start a multi-byte UTF-8 character */ | ||
85 | if ((c & 0xe0) == 0xc0) { | ||
86 | utf_char = c & 0x1f; | ||
87 | utf_cnt = 1; | ||
88 | } else if ((c & 0xf0) == 0xe0) { | ||
89 | utf_char = c & 0x0f; | ||
90 | utf_cnt = 2; | ||
91 | } else if ((c & 0xf8) == 0xf0) { | ||
92 | utf_char = c & 0x07; | ||
93 | utf_cnt = 3; | ||
94 | } else if ((c & 0xfc) == 0xf8) { | ||
95 | utf_char = c & 0x03; | ||
96 | utf_cnt = 4; | ||
97 | } else if ((c & 0xfe) == 0xfc) { | ||
98 | utf_char = c & 0x01; | ||
99 | utf_cnt = 5; | ||
100 | } else { | ||
101 | utf_cnt = -1; | ||
102 | break; | ||
103 | } | ||
104 | continue; | ||
105 | } else { | ||
106 | /* Single byte UTF-8 character (most common) */ | ||
107 | utf_char = c; | ||
108 | } | ||
109 | } | 78 | } |
110 | *uni = utf_char; | 79 | |
111 | break; | 80 | c = PLANE_SIZE + |
112 | } | 81 | ((c & SURROGATE_CHAR_MASK) << SURROGATE_CHAR_BITS) + |
113 | if (utf_cnt) { | 82 | (next & SURROGATE_CHAR_MASK); |
114 | *uni = '?'; | ||
115 | return -EINVAL; | ||
116 | } | 83 | } |
117 | return u_len; | 84 | out: |
85 | *ret = c; | ||
86 | return str_i_idx - start_idx; | ||
118 | } | 87 | } |
119 | 88 | ||
120 | #define ILLEGAL_CHAR_MARK '_' | ||
121 | #define EXT_MARK '.' | ||
122 | #define CRC_MARK '#' | ||
123 | #define EXT_SIZE 5 | ||
124 | /* Number of chars we need to store generated CRC to make filename unique */ | ||
125 | #define CRC_LEN 5 | ||
126 | 89 | ||
127 | static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len, | 90 | static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len, |
128 | int *str_o_idx, | 91 | int *str_o_idx, |
@@ -132,27 +95,29 @@ static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len, | |||
132 | int (*conv_f)(wchar_t, unsigned char *, int), | 95 | int (*conv_f)(wchar_t, unsigned char *, int), |
133 | int translate) | 96 | int translate) |
134 | { | 97 | { |
135 | uint32_t c; | 98 | unicode_t c; |
136 | int illChar = 0; | 99 | int illChar = 0; |
137 | int len, gotch = 0; | 100 | int len, gotch = 0; |
138 | 101 | ||
139 | for (; (!gotch) && (*str_i_idx < str_i_max_len); *str_i_idx += u_ch) { | 102 | while (!gotch && *str_i_idx < str_i_max_len) { |
140 | if (*str_o_idx >= str_o_max_len) { | 103 | if (*str_o_idx >= str_o_max_len) { |
141 | *needsCRC = 1; | 104 | *needsCRC = 1; |
142 | return gotch; | 105 | return gotch; |
143 | } | 106 | } |
144 | 107 | ||
145 | /* Expand OSTA compressed Unicode to Unicode */ | 108 | len = get_utf16_char(str_i, str_i_max_len, *str_i_idx, u_ch, |
146 | c = str_i[*str_i_idx]; | 109 | &c); |
147 | if (u_ch > 1) | 110 | /* These chars cannot be converted. Replace them. */ |
148 | c = (c << 8) | str_i[*str_i_idx + 1]; | 111 | if (c == 0 || c > UNICODE_MAX || (conv_f && c > MAX_WCHAR_T) || |
149 | 112 | (translate && c == '/')) { | |
150 | if (translate && (c == '/' || c == 0)) | ||
151 | illChar = 1; | 113 | illChar = 1; |
152 | else if (illChar) | 114 | if (!translate) |
115 | gotch = 1; | ||
116 | } else if (illChar) | ||
153 | break; | 117 | break; |
154 | else | 118 | else |
155 | gotch = 1; | 119 | gotch = 1; |
120 | *str_i_idx += len; | ||
156 | } | 121 | } |
157 | if (illChar) { | 122 | if (illChar) { |
158 | *needsCRC = 1; | 123 | *needsCRC = 1; |
@@ -160,7 +125,15 @@ static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len, | |||
160 | gotch = 1; | 125 | gotch = 1; |
161 | } | 126 | } |
162 | if (gotch) { | 127 | if (gotch) { |
163 | len = conv_f(c, &str_o[*str_o_idx], str_o_max_len - *str_o_idx); | 128 | if (conv_f) { |
129 | len = conv_f(c, &str_o[*str_o_idx], | ||
130 | str_o_max_len - *str_o_idx); | ||
131 | } else { | ||
132 | len = utf32_to_utf8(c, &str_o[*str_o_idx], | ||
133 | str_o_max_len - *str_o_idx); | ||
134 | if (len < 0) | ||
135 | len = -ENAMETOOLONG; | ||
136 | } | ||
164 | /* Valid character? */ | 137 | /* Valid character? */ |
165 | if (len >= 0) | 138 | if (len >= 0) |
166 | *str_o_idx += len; | 139 | *str_o_idx += len; |
@@ -168,16 +141,16 @@ static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len, | |||
168 | *needsCRC = 1; | 141 | *needsCRC = 1; |
169 | gotch = 0; | 142 | gotch = 0; |
170 | } else { | 143 | } else { |
171 | str_o[(*str_o_idx)++] = '?'; | 144 | str_o[(*str_o_idx)++] = ILLEGAL_CHAR_MARK; |
172 | *needsCRC = 1; | 145 | *needsCRC = 1; |
173 | } | 146 | } |
174 | } | 147 | } |
175 | return gotch; | 148 | return gotch; |
176 | } | 149 | } |
177 | 150 | ||
178 | static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, | 151 | static int udf_name_from_CS0(struct super_block *sb, |
152 | uint8_t *str_o, int str_max_len, | ||
179 | const uint8_t *ocu, int ocu_len, | 153 | const uint8_t *ocu, int ocu_len, |
180 | int (*conv_f)(wchar_t, unsigned char *, int), | ||
181 | int translate) | 154 | int translate) |
182 | { | 155 | { |
183 | uint32_t c; | 156 | uint32_t c; |
@@ -194,6 +167,7 @@ static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, | |||
194 | unsigned short valueCRC; | 167 | unsigned short valueCRC; |
195 | uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1]; | 168 | uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1]; |
196 | uint8_t crc[CRC_LEN]; | 169 | uint8_t crc[CRC_LEN]; |
170 | int (*conv_f)(wchar_t, unsigned char *, int); | ||
197 | 171 | ||
198 | if (str_max_len <= 0) | 172 | if (str_max_len <= 0) |
199 | return 0; | 173 | return 0; |
@@ -203,6 +177,11 @@ static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, | |||
203 | return 0; | 177 | return 0; |
204 | } | 178 | } |
205 | 179 | ||
180 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) | ||
181 | conv_f = UDF_SB(sb)->s_nls_map->uni2char; | ||
182 | else | ||
183 | conv_f = NULL; | ||
184 | |||
206 | cmp_id = ocu[0]; | 185 | cmp_id = ocu[0]; |
207 | if (cmp_id != 8 && cmp_id != 16) { | 186 | if (cmp_id != 8 && cmp_id != 16) { |
208 | memset(str_o, 0, str_max_len); | 187 | memset(str_o, 0, str_max_len); |
@@ -293,18 +272,24 @@ static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, | |||
293 | return str_o_len; | 272 | return str_o_len; |
294 | } | 273 | } |
295 | 274 | ||
296 | static int udf_name_to_CS0(uint8_t *ocu, int ocu_max_len, | 275 | static int udf_name_to_CS0(struct super_block *sb, |
297 | const uint8_t *str_i, int str_len, | 276 | uint8_t *ocu, int ocu_max_len, |
298 | int (*conv_f)(const unsigned char *, int, wchar_t *)) | 277 | const uint8_t *str_i, int str_len) |
299 | { | 278 | { |
300 | int i, len; | 279 | int i, len; |
301 | unsigned int max_val; | 280 | unsigned int max_val; |
302 | wchar_t uni_char; | ||
303 | int u_len, u_ch; | 281 | int u_len, u_ch; |
282 | unicode_t uni_char; | ||
283 | int (*conv_f)(const unsigned char *, int, wchar_t *); | ||
304 | 284 | ||
305 | if (ocu_max_len <= 0) | 285 | if (ocu_max_len <= 0) |
306 | return 0; | 286 | return 0; |
307 | 287 | ||
288 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) | ||
289 | conv_f = UDF_SB(sb)->s_nls_map->char2uni; | ||
290 | else | ||
291 | conv_f = NULL; | ||
292 | |||
308 | memset(ocu, 0, ocu_max_len); | 293 | memset(ocu, 0, ocu_max_len); |
309 | ocu[0] = 8; | 294 | ocu[0] = 8; |
310 | max_val = 0xff; | 295 | max_val = 0xff; |
@@ -312,36 +297,61 @@ static int udf_name_to_CS0(uint8_t *ocu, int ocu_max_len, | |||
312 | 297 | ||
313 | try_again: | 298 | try_again: |
314 | u_len = 1; | 299 | u_len = 1; |
315 | for (i = 0; i < str_len; i++) { | 300 | for (i = 0; i < str_len; i += len) { |
316 | /* Name didn't fit? */ | 301 | /* Name didn't fit? */ |
317 | if (u_len + u_ch > ocu_max_len) | 302 | if (u_len + u_ch > ocu_max_len) |
318 | return 0; | 303 | return 0; |
319 | len = conv_f(&str_i[i], str_len - i, &uni_char); | 304 | if (conv_f) { |
320 | if (!len) | 305 | wchar_t wchar; |
321 | continue; | 306 | |
307 | len = conv_f(&str_i[i], str_len - i, &wchar); | ||
308 | if (len > 0) | ||
309 | uni_char = wchar; | ||
310 | } else { | ||
311 | len = utf8_to_utf32(&str_i[i], str_len - i, | ||
312 | &uni_char); | ||
313 | } | ||
322 | /* Invalid character, deal with it */ | 314 | /* Invalid character, deal with it */ |
323 | if (len < 0) { | 315 | if (len <= 0 || uni_char > UNICODE_MAX) { |
324 | len = 1; | 316 | len = 1; |
325 | uni_char = '?'; | 317 | uni_char = '?'; |
326 | } | 318 | } |
327 | 319 | ||
328 | if (uni_char > max_val) { | 320 | if (uni_char > max_val) { |
329 | max_val = 0xffff; | 321 | unicode_t c; |
330 | ocu[0] = 0x10; | 322 | |
331 | u_ch = 2; | 323 | if (max_val == 0xff) { |
332 | goto try_again; | 324 | max_val = 0xffff; |
325 | ocu[0] = 0x10; | ||
326 | u_ch = 2; | ||
327 | goto try_again; | ||
328 | } | ||
329 | /* | ||
330 | * Use UTF-16 encoding for chars outside we | ||
331 | * cannot encode directly. | ||
332 | */ | ||
333 | if (u_len + 2 * u_ch > ocu_max_len) | ||
334 | return 0; | ||
335 | |||
336 | uni_char -= PLANE_SIZE; | ||
337 | c = SURROGATE_PAIR | | ||
338 | ((uni_char >> SURROGATE_CHAR_BITS) & | ||
339 | SURROGATE_CHAR_MASK); | ||
340 | ocu[u_len++] = (uint8_t)(c >> 8); | ||
341 | ocu[u_len++] = (uint8_t)(c & 0xff); | ||
342 | uni_char = SURROGATE_PAIR | SURROGATE_LOW | | ||
343 | (uni_char & SURROGATE_CHAR_MASK); | ||
333 | } | 344 | } |
334 | 345 | ||
335 | if (max_val == 0xffff) | 346 | if (max_val == 0xffff) |
336 | ocu[u_len++] = (uint8_t)(uni_char >> 8); | 347 | ocu[u_len++] = (uint8_t)(uni_char >> 8); |
337 | ocu[u_len++] = (uint8_t)(uni_char & 0xff); | 348 | ocu[u_len++] = (uint8_t)(uni_char & 0xff); |
338 | i += len - 1; | ||
339 | } | 349 | } |
340 | 350 | ||
341 | return u_len; | 351 | return u_len; |
342 | } | 352 | } |
343 | 353 | ||
344 | int udf_dstrCS0toUTF8(uint8_t *utf_o, int o_len, | 354 | int udf_dstrCS0toChar(struct super_block *sb, uint8_t *utf_o, int o_len, |
345 | const uint8_t *ocu_i, int i_len) | 355 | const uint8_t *ocu_i, int i_len) |
346 | { | 356 | { |
347 | int s_len = 0; | 357 | int s_len = 0; |
@@ -355,14 +365,12 @@ int udf_dstrCS0toUTF8(uint8_t *utf_o, int o_len, | |||
355 | } | 365 | } |
356 | } | 366 | } |
357 | 367 | ||
358 | return udf_name_from_CS0(utf_o, o_len, ocu_i, s_len, | 368 | return udf_name_from_CS0(sb, utf_o, o_len, ocu_i, s_len, 0); |
359 | udf_uni2char_utf8, 0); | ||
360 | } | 369 | } |
361 | 370 | ||
362 | int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, | 371 | int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, |
363 | uint8_t *dname, int dlen) | 372 | uint8_t *dname, int dlen) |
364 | { | 373 | { |
365 | int (*conv_f)(wchar_t, unsigned char *, int); | ||
366 | int ret; | 374 | int ret; |
367 | 375 | ||
368 | if (!slen) | 376 | if (!slen) |
@@ -371,14 +379,7 @@ int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, | |||
371 | if (dlen <= 0) | 379 | if (dlen <= 0) |
372 | return 0; | 380 | return 0; |
373 | 381 | ||
374 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { | 382 | ret = udf_name_from_CS0(sb, dname, dlen, sname, slen, 1); |
375 | conv_f = udf_uni2char_utf8; | ||
376 | } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { | ||
377 | conv_f = UDF_SB(sb)->s_nls_map->uni2char; | ||
378 | } else | ||
379 | BUG(); | ||
380 | |||
381 | ret = udf_name_from_CS0(dname, dlen, sname, slen, conv_f, 1); | ||
382 | /* Zero length filename isn't valid... */ | 383 | /* Zero length filename isn't valid... */ |
383 | if (ret == 0) | 384 | if (ret == 0) |
384 | ret = -EINVAL; | 385 | ret = -EINVAL; |
@@ -388,15 +389,6 @@ int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, | |||
388 | int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, | 389 | int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, |
389 | uint8_t *dname, int dlen) | 390 | uint8_t *dname, int dlen) |
390 | { | 391 | { |
391 | int (*conv_f)(const unsigned char *, int, wchar_t *); | 392 | return udf_name_to_CS0(sb, dname, dlen, sname, slen); |
392 | |||
393 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { | ||
394 | conv_f = udf_char2uni_utf8; | ||
395 | } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { | ||
396 | conv_f = UDF_SB(sb)->s_nls_map->char2uni; | ||
397 | } else | ||
398 | BUG(); | ||
399 | |||
400 | return udf_name_to_CS0(dname, dlen, sname, slen, conv_f); | ||
401 | } | 393 | } |
402 | 394 | ||