diff options
author | Andrew Gabbasov <andrew_gabbasov@mentor.com> | 2016-01-15 03:44:23 -0500 |
---|---|---|
committer | Jan Kara <jack@suse.cz> | 2016-02-09 07:05:23 -0500 |
commit | 484a10f49387e4386bf2708532e75bf78ffea2cb (patch) | |
tree | f9d39341f8f6bcf20dfd6ca6573fa46ea622843c /fs/udf | |
parent | 9293fcfbc1812a22ad5ce1b542eb90c1bbe01be1 (diff) |
udf: Merge linux specific translation into CS0 conversion function
Current implementation of udf_translate_to_linux function does not
support multi-bytes characters at all: it counts bytes while calculating
extension length, when inserting CRC inside the name it doesn't
take into account inter-character boundaries and can break into
the middle of the character.
The most efficient way to properly support multi-bytes characters is
merging of translation operations directly into conversion function.
This can help to avoid extra passes along the string or parsing
the multi-bytes character back into unicode to find out it's length.
Signed-off-by: Andrew Gabbasov <andrew_gabbasov@mentor.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/udf')
-rw-r--r-- | fs/udf/unicode.c | 280 |
1 files changed, 152 insertions, 128 deletions
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index dc5990f4c952..3ff42f4437f3 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c | |||
@@ -28,9 +28,6 @@ | |||
28 | 28 | ||
29 | #include "udf_sb.h" | 29 | #include "udf_sb.h" |
30 | 30 | ||
31 | static int udf_translate_to_linux(uint8_t *, int, const uint8_t *, int, | ||
32 | const uint8_t *, int); | ||
33 | |||
34 | static int udf_uni2char_utf8(wchar_t uni, | 31 | static int udf_uni2char_utf8(wchar_t uni, |
35 | unsigned char *out, | 32 | unsigned char *out, |
36 | int boundlen) | 33 | int boundlen) |
@@ -114,13 +111,83 @@ static int udf_char2uni_utf8(const unsigned char *in, | |||
114 | return u_len; | 111 | return u_len; |
115 | } | 112 | } |
116 | 113 | ||
114 | #define ILLEGAL_CHAR_MARK '_' | ||
115 | #define EXT_MARK '.' | ||
116 | #define CRC_MARK '#' | ||
117 | #define EXT_SIZE 5 | ||
118 | /* Number of chars we need to store generated CRC to make filename unique */ | ||
119 | #define CRC_LEN 5 | ||
120 | |||
121 | static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len, | ||
122 | int *str_o_idx, | ||
123 | const uint8_t *str_i, int str_i_max_len, | ||
124 | int *str_i_idx, | ||
125 | int u_ch, int *needsCRC, | ||
126 | int (*conv_f)(wchar_t, unsigned char *, int), | ||
127 | int translate) | ||
128 | { | ||
129 | uint32_t c; | ||
130 | int illChar = 0; | ||
131 | int len, gotch = 0; | ||
132 | |||
133 | for (; (!gotch) && (*str_i_idx < str_i_max_len); *str_i_idx += u_ch) { | ||
134 | if (*str_o_idx >= str_o_max_len) { | ||
135 | *needsCRC = 1; | ||
136 | return gotch; | ||
137 | } | ||
138 | |||
139 | /* Expand OSTA compressed Unicode to Unicode */ | ||
140 | c = str_i[*str_i_idx]; | ||
141 | if (u_ch > 1) | ||
142 | c = (c << 8) | str_i[*str_i_idx + 1]; | ||
143 | |||
144 | if (translate && (c == '/' || c == 0)) | ||
145 | illChar = 1; | ||
146 | else if (illChar) | ||
147 | break; | ||
148 | else | ||
149 | gotch = 1; | ||
150 | } | ||
151 | if (illChar) { | ||
152 | *needsCRC = 1; | ||
153 | c = ILLEGAL_CHAR_MARK; | ||
154 | gotch = 1; | ||
155 | } | ||
156 | if (gotch) { | ||
157 | len = conv_f(c, &str_o[*str_o_idx], str_o_max_len - *str_o_idx); | ||
158 | /* Valid character? */ | ||
159 | if (len >= 0) | ||
160 | *str_o_idx += len; | ||
161 | else if (len == -ENAMETOOLONG) { | ||
162 | *needsCRC = 1; | ||
163 | gotch = 0; | ||
164 | } else { | ||
165 | str_o[(*str_o_idx)++] = '?'; | ||
166 | *needsCRC = 1; | ||
167 | } | ||
168 | } | ||
169 | return gotch; | ||
170 | } | ||
171 | |||
117 | static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, | 172 | static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, |
118 | const uint8_t *ocu, int ocu_len, | 173 | const uint8_t *ocu, int ocu_len, |
119 | int (*conv_f)(wchar_t, unsigned char *, int)) | 174 | int (*conv_f)(wchar_t, unsigned char *, int), |
175 | int translate) | ||
120 | { | 176 | { |
177 | uint32_t c; | ||
121 | uint8_t cmp_id; | 178 | uint8_t cmp_id; |
122 | int i, len; | 179 | int idx, len; |
123 | int str_o_len = 0; | 180 | int u_ch; |
181 | int needsCRC = 0; | ||
182 | int ext_i_len, ext_max_len; | ||
183 | int str_o_len = 0; /* Length of resulting output */ | ||
184 | int ext_o_len = 0; /* Extension output length */ | ||
185 | int ext_crc_len = 0; /* Extension output length if used with CRC */ | ||
186 | int i_ext = -1; /* Extension position in input buffer */ | ||
187 | int o_crc = 0; /* Rightmost possible output pos for CRC+ext */ | ||
188 | unsigned short valueCRC; | ||
189 | uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1]; | ||
190 | uint8_t crc[CRC_LEN]; | ||
124 | 191 | ||
125 | if (str_max_len <= 0) | 192 | if (str_max_len <= 0) |
126 | return 0; | 193 | return 0; |
@@ -133,24 +200,88 @@ static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, | |||
133 | cmp_id = ocu[0]; | 200 | cmp_id = ocu[0]; |
134 | if (cmp_id != 8 && cmp_id != 16) { | 201 | if (cmp_id != 8 && cmp_id != 16) { |
135 | memset(str_o, 0, str_max_len); | 202 | memset(str_o, 0, str_max_len); |
136 | pr_err("unknown compression code (%d) stri=%s\n", cmp_id, ocu); | 203 | pr_err("unknown compression code (%d)\n", cmp_id); |
137 | return -EINVAL; | 204 | return -EINVAL; |
138 | } | 205 | } |
206 | u_ch = cmp_id >> 3; | ||
139 | 207 | ||
140 | for (i = 1; (i < ocu_len) && (str_o_len < str_max_len);) { | 208 | ocu++; |
141 | /* Expand OSTA compressed Unicode to Unicode */ | 209 | ocu_len--; |
142 | uint32_t c = ocu[i++]; | ||
143 | if (cmp_id == 16) | ||
144 | c = (c << 8) | ocu[i++]; | ||
145 | 210 | ||
146 | len = conv_f(c, &str_o[str_o_len], str_max_len - str_o_len); | 211 | if (ocu_len % u_ch) { |
147 | /* Valid character? */ | 212 | pr_err("incorrect filename length (%d)\n", ocu_len + 1); |
148 | if (len >= 0) | 213 | return -EINVAL; |
149 | str_o_len += len; | 214 | } |
150 | else if (len == -ENAMETOOLONG) | 215 | |
216 | if (translate) { | ||
217 | /* Look for extension */ | ||
218 | for (idx = ocu_len - u_ch, ext_i_len = 0; | ||
219 | (idx >= 0) && (ext_i_len < EXT_SIZE); | ||
220 | idx -= u_ch, ext_i_len++) { | ||
221 | c = ocu[idx]; | ||
222 | if (u_ch > 1) | ||
223 | c = (c << 8) | ocu[idx + 1]; | ||
224 | |||
225 | if (c == EXT_MARK) { | ||
226 | if (ext_i_len) | ||
227 | i_ext = idx; | ||
228 | break; | ||
229 | } | ||
230 | } | ||
231 | if (i_ext >= 0) { | ||
232 | /* Convert extension */ | ||
233 | ext_max_len = min_t(int, sizeof(ext), str_max_len); | ||
234 | ext[ext_o_len++] = EXT_MARK; | ||
235 | idx = i_ext + u_ch; | ||
236 | while (udf_name_conv_char(ext, ext_max_len, &ext_o_len, | ||
237 | ocu, ocu_len, &idx, | ||
238 | u_ch, &needsCRC, | ||
239 | conv_f, translate)) { | ||
240 | if ((ext_o_len + CRC_LEN) < str_max_len) | ||
241 | ext_crc_len = ext_o_len; | ||
242 | } | ||
243 | } | ||
244 | } | ||
245 | |||
246 | idx = 0; | ||
247 | while (1) { | ||
248 | if (translate && (idx == i_ext)) { | ||
249 | if (str_o_len > (str_max_len - ext_o_len)) | ||
250 | needsCRC = 1; | ||
151 | break; | 251 | break; |
152 | else | 252 | } |
153 | str_o[str_o_len++] = '?'; | 253 | |
254 | if (!udf_name_conv_char(str_o, str_max_len, &str_o_len, | ||
255 | ocu, ocu_len, &idx, | ||
256 | u_ch, &needsCRC, conv_f, translate)) | ||
257 | break; | ||
258 | |||
259 | if (translate && | ||
260 | (str_o_len <= (str_max_len - ext_o_len - CRC_LEN))) | ||
261 | o_crc = str_o_len; | ||
262 | } | ||
263 | |||
264 | if (translate) { | ||
265 | if (str_o_len <= 2 && str_o[0] == '.' && | ||
266 | (str_o_len == 1 || str_o[1] == '.')) | ||
267 | needsCRC = 1; | ||
268 | if (needsCRC) { | ||
269 | str_o_len = o_crc; | ||
270 | valueCRC = crc_itu_t(0, ocu, ocu_len); | ||
271 | crc[0] = CRC_MARK; | ||
272 | crc[1] = hex_asc_upper_hi(valueCRC >> 8); | ||
273 | crc[2] = hex_asc_upper_lo(valueCRC >> 8); | ||
274 | crc[3] = hex_asc_upper_hi(valueCRC); | ||
275 | crc[4] = hex_asc_upper_lo(valueCRC); | ||
276 | len = min_t(int, CRC_LEN, str_max_len - str_o_len); | ||
277 | memcpy(&str_o[str_o_len], crc, len); | ||
278 | str_o_len += len; | ||
279 | ext_o_len = ext_crc_len; | ||
280 | } | ||
281 | if (ext_o_len > 0) { | ||
282 | memcpy(&str_o[str_o_len], ext, ext_o_len); | ||
283 | str_o_len += ext_o_len; | ||
284 | } | ||
154 | } | 285 | } |
155 | 286 | ||
156 | return str_o_len; | 287 | return str_o_len; |
@@ -207,13 +338,12 @@ try_again: | |||
207 | int udf_CS0toUTF8(uint8_t *utf_o, int o_len, const uint8_t *ocu_i, int i_len) | 338 | int udf_CS0toUTF8(uint8_t *utf_o, int o_len, const uint8_t *ocu_i, int i_len) |
208 | { | 339 | { |
209 | return udf_name_from_CS0(utf_o, o_len, ocu_i, i_len, | 340 | return udf_name_from_CS0(utf_o, o_len, ocu_i, i_len, |
210 | udf_uni2char_utf8); | 341 | udf_uni2char_utf8, 0); |
211 | } | 342 | } |
212 | 343 | ||
213 | int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, | 344 | int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, |
214 | uint8_t *dname, int dlen) | 345 | uint8_t *dname, int dlen) |
215 | { | 346 | { |
216 | uint8_t *filename; | ||
217 | int (*conv_f)(wchar_t, unsigned char *, int); | 347 | int (*conv_f)(wchar_t, unsigned char *, int); |
218 | int ret; | 348 | int ret; |
219 | 349 | ||
@@ -223,10 +353,6 @@ int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, | |||
223 | if (dlen <= 0) | 353 | if (dlen <= 0) |
224 | return 0; | 354 | return 0; |
225 | 355 | ||
226 | filename = kmalloc(dlen, GFP_NOFS); | ||
227 | if (!filename) | ||
228 | return -ENOMEM; | ||
229 | |||
230 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { | 356 | if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { |
231 | conv_f = udf_uni2char_utf8; | 357 | conv_f = udf_uni2char_utf8; |
232 | } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { | 358 | } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { |
@@ -234,19 +360,10 @@ int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, | |||
234 | } else | 360 | } else |
235 | BUG(); | 361 | BUG(); |
236 | 362 | ||
237 | ret = udf_name_from_CS0(filename, dlen, sname, slen, conv_f); | 363 | ret = udf_name_from_CS0(dname, dlen, sname, slen, conv_f, 1); |
238 | if (ret < 0) { | ||
239 | udf_debug("Failed in udf_get_filename: sname = %s\n", sname); | ||
240 | goto out2; | ||
241 | } | ||
242 | |||
243 | ret = udf_translate_to_linux(dname, dlen, filename, dlen, | ||
244 | sname + 1, slen - 1); | ||
245 | /* Zero length filename isn't valid... */ | 364 | /* Zero length filename isn't valid... */ |
246 | if (ret == 0) | 365 | if (ret == 0) |
247 | ret = -EINVAL; | 366 | ret = -EINVAL; |
248 | out2: | ||
249 | kfree(filename); | ||
250 | return ret; | 367 | return ret; |
251 | } | 368 | } |
252 | 369 | ||
@@ -265,96 +382,3 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen, | |||
265 | return udf_name_to_CS0(dname, dlen, sname, slen, conv_f); | 382 | return udf_name_to_CS0(dname, dlen, sname, slen, conv_f); |
266 | } | 383 | } |
267 | 384 | ||
268 | #define ILLEGAL_CHAR_MARK '_' | ||
269 | #define EXT_MARK '.' | ||
270 | #define CRC_MARK '#' | ||
271 | #define EXT_SIZE 5 | ||
272 | /* Number of chars we need to store generated CRC to make filename unique */ | ||
273 | #define CRC_LEN 5 | ||
274 | |||
275 | static int udf_translate_to_linux(uint8_t *newName, int newLen, | ||
276 | const uint8_t *udfName, int udfLen, | ||
277 | const uint8_t *fidName, int fidNameLen) | ||
278 | { | ||
279 | int index, newIndex = 0, needsCRC = 0; | ||
280 | int extIndex = 0, newExtIndex = 0, hasExt = 0; | ||
281 | unsigned short valueCRC; | ||
282 | uint8_t curr; | ||
283 | |||
284 | if (udfName[0] == '.' && | ||
285 | (udfLen == 1 || (udfLen == 2 && udfName[1] == '.'))) { | ||
286 | needsCRC = 1; | ||
287 | newIndex = udfLen; | ||
288 | memcpy(newName, udfName, udfLen); | ||
289 | } else { | ||
290 | for (index = 0; index < udfLen; index++) { | ||
291 | curr = udfName[index]; | ||
292 | if (curr == '/' || curr == 0) { | ||
293 | needsCRC = 1; | ||
294 | curr = ILLEGAL_CHAR_MARK; | ||
295 | while (index + 1 < udfLen && | ||
296 | (udfName[index + 1] == '/' || | ||
297 | udfName[index + 1] == 0)) | ||
298 | index++; | ||
299 | } | ||
300 | if (curr == EXT_MARK && | ||
301 | (udfLen - index - 1) <= EXT_SIZE) { | ||
302 | if (udfLen == index + 1) | ||
303 | hasExt = 0; | ||
304 | else { | ||
305 | hasExt = 1; | ||
306 | extIndex = index; | ||
307 | newExtIndex = newIndex; | ||
308 | } | ||
309 | } | ||
310 | if (newIndex < newLen) | ||
311 | newName[newIndex++] = curr; | ||
312 | else | ||
313 | needsCRC = 1; | ||
314 | } | ||
315 | } | ||
316 | if (needsCRC) { | ||
317 | uint8_t ext[EXT_SIZE]; | ||
318 | int localExtIndex = 0; | ||
319 | |||
320 | if (hasExt) { | ||
321 | int maxFilenameLen; | ||
322 | for (index = 0; | ||
323 | index < EXT_SIZE && extIndex + index + 1 < udfLen; | ||
324 | index++) { | ||
325 | curr = udfName[extIndex + index + 1]; | ||
326 | |||
327 | if (curr == '/' || curr == 0) { | ||
328 | needsCRC = 1; | ||
329 | curr = ILLEGAL_CHAR_MARK; | ||
330 | while (extIndex + index + 2 < udfLen && | ||
331 | (index + 1 < EXT_SIZE && | ||
332 | (udfName[extIndex + index + 2] == '/' || | ||
333 | udfName[extIndex + index + 2] == 0))) | ||
334 | index++; | ||
335 | } | ||
336 | ext[localExtIndex++] = curr; | ||
337 | } | ||
338 | maxFilenameLen = newLen - CRC_LEN - localExtIndex; | ||
339 | if (newIndex > maxFilenameLen) | ||
340 | newIndex = maxFilenameLen; | ||
341 | else | ||
342 | newIndex = newExtIndex; | ||
343 | } else if (newIndex > newLen - CRC_LEN) | ||
344 | newIndex = newLen - CRC_LEN; | ||
345 | newName[newIndex++] = CRC_MARK; | ||
346 | valueCRC = crc_itu_t(0, fidName, fidNameLen); | ||
347 | newName[newIndex++] = hex_asc_upper_hi(valueCRC >> 8); | ||
348 | newName[newIndex++] = hex_asc_upper_lo(valueCRC >> 8); | ||
349 | newName[newIndex++] = hex_asc_upper_hi(valueCRC); | ||
350 | newName[newIndex++] = hex_asc_upper_lo(valueCRC); | ||
351 | |||
352 | if (hasExt) { | ||
353 | newName[newIndex++] = EXT_MARK; | ||
354 | for (index = 0; index < localExtIndex; index++) | ||
355 | newName[newIndex++] = ext[index]; | ||
356 | } | ||
357 | } | ||
358 | |||
359 | return newIndex; | ||
360 | } | ||