aboutsummaryrefslogtreecommitdiffstats
path: root/fs/udf
diff options
context:
space:
mode:
authorAndrew Gabbasov <andrew_gabbasov@mentor.com>2016-01-15 03:44:20 -0500
committerJan Kara <jack@suse.cz>2016-02-09 07:05:23 -0500
commit3e7fc2055c931b1c27a9834a753611c879492a34 (patch)
tree1aa5ec536fa68872d1f62dbd98b98a2eaa37ba5c /fs/udf
parent525e2c56c341cb8b31bbe1694f0582077f454969 (diff)
udf: Join functions for UTF8 and NLS conversions
There is no much sense to have separate functions for UTF8 and NLS conversions, since UTF8 encoding is actually the special case of NLS. However, although UTF8 is also supported by general NLS framework, it would be good to have separate UTF8 character conversion functions (char2uni and uni2char) locally in UDF code, so that they could be used even if NLS support is not enabled in the kernel configuration. Signed-off-by: Andrew Gabbasov <andrew_gabbasov@mentor.com> Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/udf')
-rw-r--r--fs/udf/unicode.c278
1 files changed, 90 insertions, 188 deletions
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 47e61883275d..4d7a674ebce5 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -76,151 +76,72 @@ static void udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
76 memcpy(dest->u_name, ptr + 1, exactsize - 1); 76 memcpy(dest->u_name, ptr + 1, exactsize - 1);
77} 77}
78 78
79/* 79static int udf_uni2char_utf8(wchar_t uni,
80 * udf_CS0toUTF8 80 unsigned char *out,
81 * 81 int boundlen)
82 * PURPOSE
83 * Convert OSTA Compressed Unicode to the UTF-8 equivalent.
84 *
85 * PRE-CONDITIONS
86 * utf Pointer to UTF-8 output buffer.
87 * ocu Pointer to OSTA Compressed Unicode input buffer
88 * of size UDF_NAME_LEN bytes.
89 * both of type "struct ustr *"
90 *
91 * POST-CONDITIONS
92 * <return> >= 0 on success.
93 *
94 * HISTORY
95 * November 12, 1997 - Andrew E. Mileski
96 * Written, tested, and released.
97 */
98int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
99{ 82{
100 const uint8_t *ocu; 83 int u_len = 0;
101 uint8_t cmp_id, ocu_len; 84
102 int i; 85 if (boundlen <= 0)
103 86 return -ENAMETOOLONG;
104 ocu_len = ocu_i->u_len; 87
105 if (ocu_len == 0) { 88 if (uni < 0x80) {
106 memset(utf_o, 0, sizeof(struct ustr)); 89 out[u_len++] = (unsigned char)uni;
107 return 0; 90 } else if (uni < 0x800) {
108 } 91 if (boundlen < 2)
109 92 return -ENAMETOOLONG;
110 cmp_id = ocu_i->u_cmpID; 93 out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
111 if (cmp_id != 8 && cmp_id != 16) { 94 out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
112 memset(utf_o, 0, sizeof(struct ustr)); 95 } else {
113 pr_err("unknown compression code (%d) stri=%s\n", 96 if (boundlen < 3)
114 cmp_id, ocu_i->u_name); 97 return -ENAMETOOLONG;
115 return -EINVAL; 98 out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
116 } 99 out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
117 100 out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
118 ocu = ocu_i->u_name;
119 utf_o->u_len = 0;
120 for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
121
122 /* Expand OSTA compressed Unicode to Unicode */
123 uint32_t c = ocu[i++];
124 if (cmp_id == 16)
125 c = (c << 8) | ocu[i++];
126
127 /* Compress Unicode to UTF-8 */
128 if (c < 0x80U)
129 utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
130 else if (c < 0x800U) {
131 if (utf_o->u_len > (UDF_NAME_LEN - 4))
132 break;
133 utf_o->u_name[utf_o->u_len++] =
134 (uint8_t)(0xc0 | (c >> 6));
135 utf_o->u_name[utf_o->u_len++] =
136 (uint8_t)(0x80 | (c & 0x3f));
137 } else {
138 if (utf_o->u_len > (UDF_NAME_LEN - 5))
139 break;
140 utf_o->u_name[utf_o->u_len++] =
141 (uint8_t)(0xe0 | (c >> 12));
142 utf_o->u_name[utf_o->u_len++] =
143 (uint8_t)(0x80 |
144 ((c >> 6) & 0x3f));
145 utf_o->u_name[utf_o->u_len++] =
146 (uint8_t)(0x80 | (c & 0x3f));
147 }
148 } 101 }
149 utf_o->u_cmpID = 8; 102 return u_len;
150
151 return utf_o->u_len;
152} 103}
153 104
154/* 105static int udf_char2uni_utf8(const unsigned char *in,
155 * 106 int boundlen,
156 * udf_UTF8toCS0 107 wchar_t *uni)
157 *
158 * PURPOSE
159 * Convert UTF-8 to the OSTA Compressed Unicode equivalent.
160 *
161 * DESCRIPTION
162 * This routine is only called by udf_lookup().
163 *
164 * PRE-CONDITIONS
165 * ocu Pointer to OSTA Compressed Unicode output
166 * buffer of size UDF_NAME_LEN bytes.
167 * utf Pointer to UTF-8 input buffer.
168 * utf_len Length of UTF-8 input buffer in bytes.
169 *
170 * POST-CONDITIONS
171 * <return> Zero on success.
172 *
173 * HISTORY
174 * November 12, 1997 - Andrew E. Mileski
175 * Written, tested, and released.
176 */
177static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
178{ 108{
179 unsigned c, i, max_val, utf_char; 109 unsigned int utf_char;
180 int utf_cnt, u_len, u_ch; 110 unsigned char c;
111 int utf_cnt, u_len;
181 112
182 memset(ocu, 0, sizeof(dstring) * length); 113 utf_char = 0;
183 ocu[0] = 8; 114 utf_cnt = 0;
184 max_val = 0xffU; 115 for (u_len = 0; u_len < boundlen;) {
185 u_ch = 1; 116 c = in[u_len++];
186
187try_again:
188 u_len = 0U;
189 utf_char = 0U;
190 utf_cnt = 0U;
191 for (i = 0U; i < utf->u_len; i++) {
192 /* Name didn't fit? */
193 if (u_len + 1 + u_ch >= length)
194 return 0;
195
196 c = (uint8_t)utf->u_name[i];
197 117
198 /* Complete a multi-byte UTF-8 character */ 118 /* Complete a multi-byte UTF-8 character */
199 if (utf_cnt) { 119 if (utf_cnt) {
200 utf_char = (utf_char << 6) | (c & 0x3fU); 120 utf_char = (utf_char << 6) | (c & 0x3f);
201 if (--utf_cnt) 121 if (--utf_cnt)
202 continue; 122 continue;
203 } else { 123 } else {
204 /* Check for a multi-byte UTF-8 character */ 124 /* Check for a multi-byte UTF-8 character */
205 if (c & 0x80U) { 125 if (c & 0x80) {
206 /* Start a multi-byte UTF-8 character */ 126 /* Start a multi-byte UTF-8 character */
207 if ((c & 0xe0U) == 0xc0U) { 127 if ((c & 0xe0) == 0xc0) {
208 utf_char = c & 0x1fU; 128 utf_char = c & 0x1f;
209 utf_cnt = 1; 129 utf_cnt = 1;
210 } else if ((c & 0xf0U) == 0xe0U) { 130 } else if ((c & 0xf0) == 0xe0) {
211 utf_char = c & 0x0fU; 131 utf_char = c & 0x0f;
212 utf_cnt = 2; 132 utf_cnt = 2;
213 } else if ((c & 0xf8U) == 0xf0U) { 133 } else if ((c & 0xf8) == 0xf0) {
214 utf_char = c & 0x07U; 134 utf_char = c & 0x07;
215 utf_cnt = 3; 135 utf_cnt = 3;
216 } else if ((c & 0xfcU) == 0xf8U) { 136 } else if ((c & 0xfc) == 0xf8) {
217 utf_char = c & 0x03U; 137 utf_char = c & 0x03;
218 utf_cnt = 4; 138 utf_cnt = 4;
219 } else if ((c & 0xfeU) == 0xfcU) { 139 } else if ((c & 0xfe) == 0xfc) {
220 utf_char = c & 0x01U; 140 utf_char = c & 0x01;
221 utf_cnt = 5; 141 utf_cnt = 5;
222 } else { 142 } else {
223 goto error_out; 143 utf_cnt = -1;
144 break;
224 } 145 }
225 continue; 146 continue;
226 } else { 147 } else {
@@ -228,36 +149,19 @@ try_again:
228 utf_char = c; 149 utf_char = c;
229 } 150 }
230 } 151 }
231 152 *uni = utf_char;
232 /* Choose no compression if necessary */ 153 break;
233 if (utf_char > max_val) {
234 if (max_val == 0xffU) {
235 max_val = 0xffffU;
236 ocu[0] = (uint8_t)0x10U;
237 u_ch = 2;
238 goto try_again;
239 }
240 goto error_out;
241 }
242
243 if (max_val == 0xffffU)
244 ocu[++u_len] = (uint8_t)(utf_char >> 8);
245 ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
246 } 154 }
247
248 if (utf_cnt) { 155 if (utf_cnt) {
249error_out: 156 *uni = '?';
250 ocu[++u_len] = '?'; 157 return -EINVAL;
251 printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n"));
252 } 158 }
253 159 return u_len;
254 ocu[length - 1] = (uint8_t)u_len + 1;
255
256 return u_len + 1;
257} 160}
258 161
259static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, 162static int udf_name_from_CS0(struct ustr *utf_o,
260 const struct ustr *ocu_i) 163 const struct ustr *ocu_i,
164 int (*conv_f)(wchar_t, unsigned char *, int))
261{ 165{
262 const uint8_t *ocu; 166 const uint8_t *ocu;
263 uint8_t cmp_id, ocu_len; 167 uint8_t cmp_id, ocu_len;
@@ -286,11 +190,13 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
286 if (cmp_id == 16) 190 if (cmp_id == 16)
287 c = (c << 8) | ocu[i++]; 191 c = (c << 8) | ocu[i++];
288 192
289 len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len], 193 len = conv_f(c, &utf_o->u_name[utf_o->u_len],
290 UDF_NAME_LEN - 2 - utf_o->u_len); 194 UDF_NAME_LEN - 2 - utf_o->u_len);
291 /* Valid character? */ 195 /* Valid character? */
292 if (len >= 0) 196 if (len >= 0)
293 utf_o->u_len += len; 197 utf_o->u_len += len;
198 else if (len == -ENAMETOOLONG)
199 break;
294 else 200 else
295 utf_o->u_name[utf_o->u_len++] = '?'; 201 utf_o->u_name[utf_o->u_len++] = '?';
296 } 202 }
@@ -299,26 +205,26 @@ static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
299 return utf_o->u_len; 205 return utf_o->u_len;
300} 206}
301 207
302static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, 208static int udf_name_to_CS0(dstring *ocu, struct ustr *uni, int length,
303 int length) 209 int (*conv_f)(const unsigned char *, int, wchar_t *))
304{ 210{
305 int len; 211 int i, len;
306 unsigned i, max_val; 212 unsigned int max_val;
307 uint16_t uni_char; 213 wchar_t uni_char;
308 int u_len, u_ch; 214 int u_len, u_ch;
309 215
310 memset(ocu, 0, sizeof(dstring) * length); 216 memset(ocu, 0, sizeof(dstring) * length);
311 ocu[0] = 8; 217 ocu[0] = 8;
312 max_val = 0xffU; 218 max_val = 0xff;
313 u_ch = 1; 219 u_ch = 1;
314 220
315try_again: 221try_again:
316 u_len = 0U; 222 u_len = 0;
317 for (i = 0U; i < uni->u_len; i++) { 223 for (i = 0; i < uni->u_len; i++) {
318 /* Name didn't fit? */ 224 /* Name didn't fit? */
319 if (u_len + 1 + u_ch >= length) 225 if (u_len + 1 + u_ch >= length)
320 return 0; 226 return 0;
321 len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char); 227 len = conv_f(&uni->u_name[i], uni->u_len - i, &uni_char);
322 if (!len) 228 if (!len)
323 continue; 229 continue;
324 /* Invalid character, deal with it */ 230 /* Invalid character, deal with it */
@@ -328,15 +234,15 @@ try_again:
328 } 234 }
329 235
330 if (uni_char > max_val) { 236 if (uni_char > max_val) {
331 max_val = 0xffffU; 237 max_val = 0xffff;
332 ocu[0] = (uint8_t)0x10U; 238 ocu[0] = 0x10;
333 u_ch = 2; 239 u_ch = 2;
334 goto try_again; 240 goto try_again;
335 } 241 }
336 242
337 if (max_val == 0xffffU) 243 if (max_val == 0xffff)
338 ocu[++u_len] = (uint8_t)(uni_char >> 8); 244 ocu[++u_len] = (uint8_t)(uni_char >> 8);
339 ocu[++u_len] = (uint8_t)(uni_char & 0xffU); 245 ocu[++u_len] = (uint8_t)(uni_char & 0xff);
340 i += len - 1; 246 i += len - 1;
341 } 247 }
342 248
@@ -344,10 +250,16 @@ try_again:
344 return u_len + 1; 250 return u_len + 1;
345} 251}
346 252
253int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
254{
255 return udf_name_from_CS0(utf_o, ocu_i, udf_uni2char_utf8);
256}
257
347int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen, 258int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
348 uint8_t *dname, int dlen) 259 uint8_t *dname, int dlen)
349{ 260{
350 struct ustr *filename, *unifilename; 261 struct ustr *filename, *unifilename;
262 int (*conv_f)(wchar_t, unsigned char *, int);
351 int ret; 263 int ret;
352 264
353 if (!slen) 265 if (!slen)
@@ -365,23 +277,18 @@ int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
365 277
366 udf_build_ustr_exact(unifilename, sname, slen); 278 udf_build_ustr_exact(unifilename, sname, slen);
367 if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { 279 if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
368 ret = udf_CS0toUTF8(filename, unifilename); 280 conv_f = udf_uni2char_utf8;
369 if (ret < 0) {
370 udf_debug("Failed in udf_get_filename: sname = %s\n",
371 sname);
372 goto out2;
373 }
374 } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { 281 } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
375 ret = udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename, 282 conv_f = UDF_SB(sb)->s_nls_map->uni2char;
376 unifilename);
377 if (ret < 0) {
378 udf_debug("Failed in udf_get_filename: sname = %s\n",
379 sname);
380 goto out2;
381 }
382 } else 283 } else
383 BUG(); 284 BUG();
384 285
286 ret = udf_name_from_CS0(filename, unifilename, conv_f);
287 if (ret < 0) {
288 udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
289 goto out2;
290 }
291
385 ret = udf_translate_to_linux(dname, dlen, 292 ret = udf_translate_to_linux(dname, dlen,
386 filename->u_name, filename->u_len, 293 filename->u_name, filename->u_len,
387 unifilename->u_name, unifilename->u_len); 294 unifilename->u_name, unifilename->u_len);
@@ -399,24 +306,19 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
399 uint8_t *dname, int dlen) 306 uint8_t *dname, int dlen)
400{ 307{
401 struct ustr unifilename; 308 struct ustr unifilename;
402 int namelen; 309 int (*conv_f)(const unsigned char *, int, wchar_t *);
403 310
404 if (!udf_char_to_ustr(&unifilename, sname, slen)) 311 if (!udf_char_to_ustr(&unifilename, sname, slen))
405 return 0; 312 return 0;
406 313
407 if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { 314 if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
408 namelen = udf_UTF8toCS0(dname, &unifilename, dlen); 315 conv_f = udf_char2uni_utf8;
409 if (!namelen)
410 return 0;
411 } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { 316 } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
412 namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname, 317 conv_f = UDF_SB(sb)->s_nls_map->char2uni;
413 &unifilename, dlen);
414 if (!namelen)
415 return 0;
416 } else 318 } else
417 return 0; 319 BUG();
418 320
419 return namelen; 321 return udf_name_to_CS0(dname, &unifilename, dlen, conv_f);
420} 322}
421 323
422#define ILLEGAL_CHAR_MARK '_' 324#define ILLEGAL_CHAR_MARK '_'