aboutsummaryrefslogtreecommitdiffstats
path: root/fs/udf
diff options
context:
space:
mode:
authorAndrew Gabbasov <andrew_gabbasov@mentor.com>2016-01-15 03:44:23 -0500
committerJan Kara <jack@suse.cz>2016-02-09 07:05:23 -0500
commit484a10f49387e4386bf2708532e75bf78ffea2cb (patch)
treef9d39341f8f6bcf20dfd6ca6573fa46ea622843c /fs/udf
parent9293fcfbc1812a22ad5ce1b542eb90c1bbe01be1 (diff)
udf: Merge linux specific translation into CS0 conversion function
Current implementation of udf_translate_to_linux function does not support multi-bytes characters at all: it counts bytes while calculating extension length, when inserting CRC inside the name it doesn't take into account inter-character boundaries and can break into the middle of the character. The most efficient way to properly support multi-bytes characters is merging of translation operations directly into conversion function. This can help to avoid extra passes along the string or parsing the multi-bytes character back into unicode to find out it's length. Signed-off-by: Andrew Gabbasov <andrew_gabbasov@mentor.com> Signed-off-by: Jan Kara <jack@suse.cz>
Diffstat (limited to 'fs/udf')
-rw-r--r--fs/udf/unicode.c280
1 files changed, 152 insertions, 128 deletions
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index dc5990f4c952..3ff42f4437f3 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -28,9 +28,6 @@
28 28
29#include "udf_sb.h" 29#include "udf_sb.h"
30 30
31static int udf_translate_to_linux(uint8_t *, int, const uint8_t *, int,
32 const uint8_t *, int);
33
34static int udf_uni2char_utf8(wchar_t uni, 31static int udf_uni2char_utf8(wchar_t uni,
35 unsigned char *out, 32 unsigned char *out,
36 int boundlen) 33 int boundlen)
@@ -114,13 +111,83 @@ static int udf_char2uni_utf8(const unsigned char *in,
114 return u_len; 111 return u_len;
115} 112}
116 113
114#define ILLEGAL_CHAR_MARK '_'
115#define EXT_MARK '.'
116#define CRC_MARK '#'
117#define EXT_SIZE 5
118/* Number of chars we need to store generated CRC to make filename unique */
119#define CRC_LEN 5
120
121static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len,
122 int *str_o_idx,
123 const uint8_t *str_i, int str_i_max_len,
124 int *str_i_idx,
125 int u_ch, int *needsCRC,
126 int (*conv_f)(wchar_t, unsigned char *, int),
127 int translate)
128{
129 uint32_t c;
130 int illChar = 0;
131 int len, gotch = 0;
132
133 for (; (!gotch) && (*str_i_idx < str_i_max_len); *str_i_idx += u_ch) {
134 if (*str_o_idx >= str_o_max_len) {
135 *needsCRC = 1;
136 return gotch;
137 }
138
139 /* Expand OSTA compressed Unicode to Unicode */
140 c = str_i[*str_i_idx];
141 if (u_ch > 1)
142 c = (c << 8) | str_i[*str_i_idx + 1];
143
144 if (translate && (c == '/' || c == 0))
145 illChar = 1;
146 else if (illChar)
147 break;
148 else
149 gotch = 1;
150 }
151 if (illChar) {
152 *needsCRC = 1;
153 c = ILLEGAL_CHAR_MARK;
154 gotch = 1;
155 }
156 if (gotch) {
157 len = conv_f(c, &str_o[*str_o_idx], str_o_max_len - *str_o_idx);
158 /* Valid character? */
159 if (len >= 0)
160 *str_o_idx += len;
161 else if (len == -ENAMETOOLONG) {
162 *needsCRC = 1;
163 gotch = 0;
164 } else {
165 str_o[(*str_o_idx)++] = '?';
166 *needsCRC = 1;
167 }
168 }
169 return gotch;
170}
171
117static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, 172static int udf_name_from_CS0(uint8_t *str_o, int str_max_len,
118 const uint8_t *ocu, int ocu_len, 173 const uint8_t *ocu, int ocu_len,
119 int (*conv_f)(wchar_t, unsigned char *, int)) 174 int (*conv_f)(wchar_t, unsigned char *, int),
175 int translate)
120{ 176{
177 uint32_t c;
121 uint8_t cmp_id; 178 uint8_t cmp_id;
122 int i, len; 179 int idx, len;
123 int str_o_len = 0; 180 int u_ch;
181 int needsCRC = 0;
182 int ext_i_len, ext_max_len;
183 int str_o_len = 0; /* Length of resulting output */
184 int ext_o_len = 0; /* Extension output length */
185 int ext_crc_len = 0; /* Extension output length if used with CRC */
186 int i_ext = -1; /* Extension position in input buffer */
187 int o_crc = 0; /* Rightmost possible output pos for CRC+ext */
188 unsigned short valueCRC;
189 uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1];
190 uint8_t crc[CRC_LEN];
124 191
125 if (str_max_len <= 0) 192 if (str_max_len <= 0)
126 return 0; 193 return 0;
@@ -133,24 +200,88 @@ static int udf_name_from_CS0(uint8_t *str_o, int str_max_len,
133 cmp_id = ocu[0]; 200 cmp_id = ocu[0];
134 if (cmp_id != 8 && cmp_id != 16) { 201 if (cmp_id != 8 && cmp_id != 16) {
135 memset(str_o, 0, str_max_len); 202 memset(str_o, 0, str_max_len);
136 pr_err("unknown compression code (%d) stri=%s\n", cmp_id, ocu); 203 pr_err("unknown compression code (%d)\n", cmp_id);
137 return -EINVAL; 204 return -EINVAL;
138 } 205 }
206 u_ch = cmp_id >> 3;
139 207
140 for (i = 1; (i < ocu_len) && (str_o_len < str_max_len);) { 208 ocu++;
141 /* Expand OSTA compressed Unicode to Unicode */ 209 ocu_len--;
142 uint32_t c = ocu[i++];
143 if (cmp_id == 16)
144 c = (c << 8) | ocu[i++];
145 210
146 len = conv_f(c, &str_o[str_o_len], str_max_len - str_o_len); 211 if (ocu_len % u_ch) {
147 /* Valid character? */ 212 pr_err("incorrect filename length (%d)\n", ocu_len + 1);
148 if (len >= 0) 213 return -EINVAL;
149 str_o_len += len; 214 }
150 else if (len == -ENAMETOOLONG) 215
216 if (translate) {
217 /* Look for extension */
218 for (idx = ocu_len - u_ch, ext_i_len = 0;
219 (idx >= 0) && (ext_i_len < EXT_SIZE);
220 idx -= u_ch, ext_i_len++) {
221 c = ocu[idx];
222 if (u_ch > 1)
223 c = (c << 8) | ocu[idx + 1];
224
225 if (c == EXT_MARK) {
226 if (ext_i_len)
227 i_ext = idx;
228 break;
229 }
230 }
231 if (i_ext >= 0) {
232 /* Convert extension */
233 ext_max_len = min_t(int, sizeof(ext), str_max_len);
234 ext[ext_o_len++] = EXT_MARK;
235 idx = i_ext + u_ch;
236 while (udf_name_conv_char(ext, ext_max_len, &ext_o_len,
237 ocu, ocu_len, &idx,
238 u_ch, &needsCRC,
239 conv_f, translate)) {
240 if ((ext_o_len + CRC_LEN) < str_max_len)
241 ext_crc_len = ext_o_len;
242 }
243 }
244 }
245
246 idx = 0;
247 while (1) {
248 if (translate && (idx == i_ext)) {
249 if (str_o_len > (str_max_len - ext_o_len))
250 needsCRC = 1;
151 break; 251 break;
152 else 252 }
153 str_o[str_o_len++] = '?'; 253
254 if (!udf_name_conv_char(str_o, str_max_len, &str_o_len,
255 ocu, ocu_len, &idx,
256 u_ch, &needsCRC, conv_f, translate))
257 break;
258
259 if (translate &&
260 (str_o_len <= (str_max_len - ext_o_len - CRC_LEN)))
261 o_crc = str_o_len;
262 }
263
264 if (translate) {
265 if (str_o_len <= 2 && str_o[0] == '.' &&
266 (str_o_len == 1 || str_o[1] == '.'))
267 needsCRC = 1;
268 if (needsCRC) {
269 str_o_len = o_crc;
270 valueCRC = crc_itu_t(0, ocu, ocu_len);
271 crc[0] = CRC_MARK;
272 crc[1] = hex_asc_upper_hi(valueCRC >> 8);
273 crc[2] = hex_asc_upper_lo(valueCRC >> 8);
274 crc[3] = hex_asc_upper_hi(valueCRC);
275 crc[4] = hex_asc_upper_lo(valueCRC);
276 len = min_t(int, CRC_LEN, str_max_len - str_o_len);
277 memcpy(&str_o[str_o_len], crc, len);
278 str_o_len += len;
279 ext_o_len = ext_crc_len;
280 }
281 if (ext_o_len > 0) {
282 memcpy(&str_o[str_o_len], ext, ext_o_len);
283 str_o_len += ext_o_len;
284 }
154 } 285 }
155 286
156 return str_o_len; 287 return str_o_len;
@@ -207,13 +338,12 @@ try_again:
207int udf_CS0toUTF8(uint8_t *utf_o, int o_len, const uint8_t *ocu_i, int i_len) 338int udf_CS0toUTF8(uint8_t *utf_o, int o_len, const uint8_t *ocu_i, int i_len)
208{ 339{
209 return udf_name_from_CS0(utf_o, o_len, ocu_i, i_len, 340 return udf_name_from_CS0(utf_o, o_len, ocu_i, i_len,
210 udf_uni2char_utf8); 341 udf_uni2char_utf8, 0);
211} 342}
212 343
213int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen, 344int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen,
214 uint8_t *dname, int dlen) 345 uint8_t *dname, int dlen)
215{ 346{
216 uint8_t *filename;
217 int (*conv_f)(wchar_t, unsigned char *, int); 347 int (*conv_f)(wchar_t, unsigned char *, int);
218 int ret; 348 int ret;
219 349
@@ -223,10 +353,6 @@ int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen,
223 if (dlen <= 0) 353 if (dlen <= 0)
224 return 0; 354 return 0;
225 355
226 filename = kmalloc(dlen, GFP_NOFS);
227 if (!filename)
228 return -ENOMEM;
229
230 if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) { 356 if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
231 conv_f = udf_uni2char_utf8; 357 conv_f = udf_uni2char_utf8;
232 } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) { 358 } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
@@ -234,19 +360,10 @@ int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen,
234 } else 360 } else
235 BUG(); 361 BUG();
236 362
237 ret = udf_name_from_CS0(filename, dlen, sname, slen, conv_f); 363 ret = udf_name_from_CS0(dname, dlen, sname, slen, conv_f, 1);
238 if (ret < 0) {
239 udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
240 goto out2;
241 }
242
243 ret = udf_translate_to_linux(dname, dlen, filename, dlen,
244 sname + 1, slen - 1);
245 /* Zero length filename isn't valid... */ 364 /* Zero length filename isn't valid... */
246 if (ret == 0) 365 if (ret == 0)
247 ret = -EINVAL; 366 ret = -EINVAL;
248out2:
249 kfree(filename);
250 return ret; 367 return ret;
251} 368}
252 369
@@ -265,96 +382,3 @@ int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
265 return udf_name_to_CS0(dname, dlen, sname, slen, conv_f); 382 return udf_name_to_CS0(dname, dlen, sname, slen, conv_f);
266} 383}
267 384
268#define ILLEGAL_CHAR_MARK '_'
269#define EXT_MARK '.'
270#define CRC_MARK '#'
271#define EXT_SIZE 5
272/* Number of chars we need to store generated CRC to make filename unique */
273#define CRC_LEN 5
274
275static int udf_translate_to_linux(uint8_t *newName, int newLen,
276 const uint8_t *udfName, int udfLen,
277 const uint8_t *fidName, int fidNameLen)
278{
279 int index, newIndex = 0, needsCRC = 0;
280 int extIndex = 0, newExtIndex = 0, hasExt = 0;
281 unsigned short valueCRC;
282 uint8_t curr;
283
284 if (udfName[0] == '.' &&
285 (udfLen == 1 || (udfLen == 2 && udfName[1] == '.'))) {
286 needsCRC = 1;
287 newIndex = udfLen;
288 memcpy(newName, udfName, udfLen);
289 } else {
290 for (index = 0; index < udfLen; index++) {
291 curr = udfName[index];
292 if (curr == '/' || curr == 0) {
293 needsCRC = 1;
294 curr = ILLEGAL_CHAR_MARK;
295 while (index + 1 < udfLen &&
296 (udfName[index + 1] == '/' ||
297 udfName[index + 1] == 0))
298 index++;
299 }
300 if (curr == EXT_MARK &&
301 (udfLen - index - 1) <= EXT_SIZE) {
302 if (udfLen == index + 1)
303 hasExt = 0;
304 else {
305 hasExt = 1;
306 extIndex = index;
307 newExtIndex = newIndex;
308 }
309 }
310 if (newIndex < newLen)
311 newName[newIndex++] = curr;
312 else
313 needsCRC = 1;
314 }
315 }
316 if (needsCRC) {
317 uint8_t ext[EXT_SIZE];
318 int localExtIndex = 0;
319
320 if (hasExt) {
321 int maxFilenameLen;
322 for (index = 0;
323 index < EXT_SIZE && extIndex + index + 1 < udfLen;
324 index++) {
325 curr = udfName[extIndex + index + 1];
326
327 if (curr == '/' || curr == 0) {
328 needsCRC = 1;
329 curr = ILLEGAL_CHAR_MARK;
330 while (extIndex + index + 2 < udfLen &&
331 (index + 1 < EXT_SIZE &&
332 (udfName[extIndex + index + 2] == '/' ||
333 udfName[extIndex + index + 2] == 0)))
334 index++;
335 }
336 ext[localExtIndex++] = curr;
337 }
338 maxFilenameLen = newLen - CRC_LEN - localExtIndex;
339 if (newIndex > maxFilenameLen)
340 newIndex = maxFilenameLen;
341 else
342 newIndex = newExtIndex;
343 } else if (newIndex > newLen - CRC_LEN)
344 newIndex = newLen - CRC_LEN;
345 newName[newIndex++] = CRC_MARK;
346 valueCRC = crc_itu_t(0, fidName, fidNameLen);
347 newName[newIndex++] = hex_asc_upper_hi(valueCRC >> 8);
348 newName[newIndex++] = hex_asc_upper_lo(valueCRC >> 8);
349 newName[newIndex++] = hex_asc_upper_hi(valueCRC);
350 newName[newIndex++] = hex_asc_upper_lo(valueCRC);
351
352 if (hasExt) {
353 newName[newIndex++] = EXT_MARK;
354 for (index = 0; index < localExtIndex; index++)
355 newName[newIndex++] = ext[index];
356 }
357 }
358
359 return newIndex;
360}