Linux-2.6.12-rc2v2.6.12-rc2

Initial git repository build. I'm not bothering with the full history, even though we have it. We can create a separate "historical" git archive of that later if we want to, and in the meantime it's about 3.2GB when imported into git - space that would just make the early git days unnecessarily complicated, when we don't have a lot of good infrastructure for it. Let it rip!
author: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
committer: Linus Torvalds <torvalds@ppc970.osdl.org> 2005-04-16 18:20:36 -0400
commit: 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree: 0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/udf/unicode.c
1 files changed, 516 insertions, 0 deletions
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
new file mode 100644
index 000000000000..5a80efd8debc
--- /dev/null
+++ b/fs/udf/unicode.c
@@ -0,0 +1,516 @@
+/*
+ * unicode.c
+ *
+ * PURPOSE
+ *      Routines for converting between UTF-8 and OSTA Compressed Unicode.
+ *      Also handles filename mangling
+ *
+ * DESCRIPTION
+ *      OSTA Compressed Unicode is explained in the OSTA UDF specification.
+ *              http://www.osta.org/
+ *      UTF-8 is explained in the IETF RFC XXXX.
+ *              ftp://ftp.internic.net/rfc/rfcxxxx.txt
+ *
+ * CONTACTS
+ *      E-mail regarding any portion of the Linux UDF file system should be
+ *      directed to the development team's mailing list (run by majordomo):
+ *              linux_udf@hpesjro.fc.hp.com
+ *
+ * COPYRIGHT
+ *      This file is distributed under the terms of the GNU General Public
+ *      License (GPL). Copies of the GPL can be obtained from:
+ *              ftp://prep.ai.mit.edu/pub/gnu/GPL
+ *      Each contributing author retains all rights to their own work.
+ */
+#include "udfdecl.h"
+#include <linux/kernel.h>
+#include <linux/string.h>       /* for memset */
+#include <linux/nls.h>
+#include <linux/udf_fs.h>
+#include "udf_sb.h"
+static int udf_translate_to_linux(uint8_t *, uint8_t *, int, uint8_t *, int);
+static int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen)
+{
+        if ( (!dest) || (!src) || (!strlen) || (strlen > UDF_NAME_LEN-2) )
+                return 0;
+        memset(dest, 0, sizeof(struct ustr));
+        memcpy(dest->u_name, src, strlen);
+        dest->u_cmpID = 0x08;
+        dest->u_len = strlen;
+        return strlen;
+}
+/*
+ * udf_build_ustr
+ */
+int udf_build_ustr(struct ustr *dest, dstring *ptr, int size)
+{
+        int usesize;
+        if ( (!dest) || (!ptr) || (!size) )
+                return -1;
+        memset(dest, 0, sizeof(struct ustr));
+        usesize= (size > UDF_NAME_LEN) ? UDF_NAME_LEN : size;
+        dest->u_cmpID=ptr[0];
+        dest->u_len=ptr[size-1];
+        memcpy(dest->u_name, ptr+1, usesize-1);
+        return 0;
+}
+/*
+ * udf_build_ustr_exact
+ */
+static int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
+{
+        if ( (!dest) || (!ptr) || (!exactsize) )
+                return -1;
+        memset(dest, 0, sizeof(struct ustr));
+        dest->u_cmpID=ptr[0];
+        dest->u_len=exactsize-1;
+        memcpy(dest->u_name, ptr+1, exactsize-1);
+        return 0;
+}
+/*
+ * udf_ocu_to_utf8
+ *
+ * PURPOSE
+ *      Convert OSTA Compressed Unicode to the UTF-8 equivalent.
+ *
+ * DESCRIPTION
+ *      This routine is only called by udf_filldir().
+ *
+ * PRE-CONDITIONS
+ *      utf                     Pointer to UTF-8 output buffer.
+ *      ocu                     Pointer to OSTA Compressed Unicode input buffer
+ *                              of size UDF_NAME_LEN bytes.
+ *                              both of type "struct ustr *"
+ *
+ * POST-CONDITIONS
+ *      <return>                Zero on success.
+ *
+ * HISTORY
+ *      November 12, 1997 - Andrew E. Mileski
+ *      Written, tested, and released.
+ */
+int udf_CS0toUTF8(struct ustr *utf_o, struct ustr *ocu_i)
+{
+        uint8_t *ocu;
+        uint32_t c;
+        uint8_t cmp_id, ocu_len;
+        int i;
+        ocu = ocu_i->u_name;
+        ocu_len = ocu_i->u_len;
+        cmp_id = ocu_i->u_cmpID;
+        utf_o->u_len = 0;
+        if (ocu_len == 0)
+        {
+                memset(utf_o, 0, sizeof(struct ustr));
+                utf_o->u_cmpID = 0;
+                utf_o->u_len = 0;
+                return 0;
+        }
+        if ((cmp_id != 8) && (cmp_id != 16))
+        {
+                printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name);
+                return 0;
+        }
+        for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
+        {
+                /* Expand OSTA compressed Unicode to Unicode */
+                c = ocu[i++];
+                if (cmp_id == 16)
+                        c = (c << 8) | ocu[i++];
+                /* Compress Unicode to UTF-8 */
+                if (c < 0x80U)
+                        utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
+                else if (c < 0x800U)
+                {
+                        utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xc0 | (c >> 6));
+                        utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f));
+                }
+                else
+                {
+                        utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xe0 | (c >> 12));
+                        utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | ((c >> 6) & 0x3f));
+                        utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 | (c & 0x3f));
+                }
+        }
+        utf_o->u_cmpID=8;
+        return utf_o->u_len;
+}
+/*
+ *
+ * udf_utf8_to_ocu
+ *
+ * PURPOSE
+ *      Convert UTF-8 to the OSTA Compressed Unicode equivalent.
+ *
+ * DESCRIPTION
+ *      This routine is only called by udf_lookup().
+ *
+ * PRE-CONDITIONS
+ *      ocu                     Pointer to OSTA Compressed Unicode output
+ *                              buffer of size UDF_NAME_LEN bytes.
+ *      utf                     Pointer to UTF-8 input buffer.
+ *      utf_len                 Length of UTF-8 input buffer in bytes.
+ *
+ * POST-CONDITIONS
+ *      <return>                Zero on success.
+ *
+ * HISTORY
+ *      November 12, 1997 - Andrew E. Mileski
+ *      Written, tested, and released.
+ */
+static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
+{
+        unsigned c, i, max_val, utf_char;
+        int utf_cnt, u_len;
+        memset(ocu, 0, sizeof(dstring) * length);
+        ocu[0] = 8;
+        max_val = 0xffU;
+try_again:
+        u_len = 0U;
+        utf_char = 0U;
+        utf_cnt = 0U;
+        for (i = 0U; i < utf->u_len; i++)
+        {
+                c = (uint8_t)utf->u_name[i];
+                /* Complete a multi-byte UTF-8 character */
+                if (utf_cnt)
+                {
+                        utf_char = (utf_char << 6) | (c & 0x3fU);
+                        if (--utf_cnt)
+                                continue;
+                }
+                else
+                {
+                        /* Check for a multi-byte UTF-8 character */
+                        if (c & 0x80U)
+                        {
+                                /* Start a multi-byte UTF-8 character */
+                                if ((c & 0xe0U) == 0xc0U)
+                                {
+                                        utf_char = c & 0x1fU;
+                                        utf_cnt = 1;
+                                }
+                                else if ((c & 0xf0U) == 0xe0U)
+                                {
+                                        utf_char = c & 0x0fU;
+                                        utf_cnt = 2;
+                                }
+                                else if ((c & 0xf8U) == 0xf0U)
+                                {
+                                        utf_char = c & 0x07U;
+                                        utf_cnt = 3;
+                                }
+                                else if ((c & 0xfcU) == 0xf8U)
+                                {
+                                        utf_char = c & 0x03U;
+                                        utf_cnt = 4;
+                                }
+                                else if ((c & 0xfeU) == 0xfcU)
+                                {
+                                        utf_char = c & 0x01U;
+                                        utf_cnt = 5;
+                                }
+                                else
+                                        goto error_out;
+                                continue;
+                        } else
+                                /* Single byte UTF-8 character (most common) */
+                                utf_char = c;
+                }
+                /* Choose no compression if necessary */
+                if (utf_char > max_val)
+                {
+                        if ( 0xffU == max_val )
+                        {
+                                max_val = 0xffffU;
+                                ocu[0] = (uint8_t)0x10U;
+                                goto try_again;
+                        }
+                        goto error_out;
+                }
+                if (max_val == 0xffffU)
+                {
+                        ocu[++u_len] = (uint8_t)(utf_char >> 8);
+                }
+                ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
+        }
+        if (utf_cnt)
+        {
+error_out:
+                ocu[++u_len] = '?';
+                printk(KERN_DEBUG "udf: bad UTF-8 character\n");
+        }
+        ocu[length - 1] = (uint8_t)u_len + 1;
+        return u_len + 1;
+}
+static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o, struct ustr *ocu_i)
+{
+        uint8_t *ocu;
+        uint32_t c;
+        uint8_t cmp_id, ocu_len;
+        int i;
+        ocu = ocu_i->u_name;
+        ocu_len = ocu_i->u_len;
+        cmp_id = ocu_i->u_cmpID;
+        utf_o->u_len = 0;
+        if (ocu_len == 0)
+        {
+                memset(utf_o, 0, sizeof(struct ustr));
+                utf_o->u_cmpID = 0;
+                utf_o->u_len = 0;
+                return 0;
+        }
+        if ((cmp_id != 8) && (cmp_id != 16))
+        {
+                printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name);
+                return 0;
+        }
+        for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
+        {
+                /* Expand OSTA compressed Unicode to Unicode */
+                c = ocu[i++];
+                if (cmp_id == 16)
+                        c = (c << 8) | ocu[i++];
+                utf_o->u_len += nls->uni2char(c, &utf_o->u_name[utf_o->u_len], 
+                        UDF_NAME_LEN - utf_o->u_len);
+        }
+        utf_o->u_cmpID=8;
+        return utf_o->u_len;
+}
+static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni, int length)
+{
+        unsigned len, i, max_val;
+        uint16_t uni_char;
+        int u_len;
+        memset(ocu, 0, sizeof(dstring) * length);
+        ocu[0] = 8;
+        max_val = 0xffU;
+try_again:
+        u_len = 0U;
+        for (i = 0U; i < uni->u_len; i++)
+        {
+                len = nls->char2uni(&uni->u_name[i], uni->u_len-i, &uni_char);
+                if (len <= 0)
+                        continue;
+                if (uni_char > max_val)
+                {
+                        max_val = 0xffffU;
+                        ocu[0] = (uint8_t)0x10U;
+                        goto try_again;
+                }
+                
+                if (max_val == 0xffffU)
+                        ocu[++u_len] = (uint8_t)(uni_char >> 8);
+                ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
+                i += len - 1;
+        }
+        ocu[length - 1] = (uint8_t)u_len + 1;
+        return u_len + 1;
+}
+int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname, int flen)
+{
+        struct ustr filename, unifilename;
+        int len;
+        if (udf_build_ustr_exact(&unifilename, sname, flen))
+        {
+                return 0;
+        }
+        if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8))
+        {
+                if (!udf_CS0toUTF8(&filename, &unifilename) )
+                {
+                        udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
+                        return 0;
+                }
+        }
+        else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
+        {
+                if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, &filename, &unifilename) )
+                {
+                        udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
+                        return 0;
+                }
+        }
+        else
+                return 0;
+        if ((len = udf_translate_to_linux(dname, filename.u_name, filename.u_len,
+                unifilename.u_name, unifilename.u_len)))
+        {
+                return len;
+        }
+        return 0;
+}
+int udf_put_filename(struct super_block *sb, const uint8_t *sname, uint8_t *dname, int flen)
+{
+        struct ustr unifilename;
+        int namelen;
+        if ( !(udf_char_to_ustr(&unifilename, sname, flen)) )
+        {
+                return 0;
+        }
+        if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8))
+        {
+                if ( !(namelen = udf_UTF8toCS0(dname, &unifilename, UDF_NAME_LEN)) )
+                {
+                        return 0;
+                }
+        }
+        else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
+        {
+                if ( !(namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname, &unifilename, UDF_NAME_LEN)) )
+                {
+                        return 0;
+                }
+        }
+        else
+                return 0;
+        return namelen;
+}
+#define ILLEGAL_CHAR_MARK       '_'
+#define EXT_MARK                        '.'
+#define CRC_MARK                        '#'
+#define EXT_SIZE                        5
+static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName, int udfLen, uint8_t *fidName, int fidNameLen)
+{
+        int index, newIndex = 0, needsCRC = 0;  
+        int extIndex = 0, newExtIndex = 0, hasExt = 0;
+        unsigned short valueCRC;
+        uint8_t curr;
+        const uint8_t hexChar[] = "0123456789ABCDEF";
+        if (udfName[0] == '.' && (udfLen == 1 ||
+                (udfLen == 2 && udfName[1] == '.')))
+        {
+                needsCRC = 1;
+                newIndex = udfLen;
+                memcpy(newName, udfName, udfLen);
+        }
+        else
+        {       
+                for (index = 0; index < udfLen; index++)
+                {
+                        curr = udfName[index];
+                        if (curr == '/' || curr == 0)
+                        {
+                                needsCRC = 1;
+                                curr = ILLEGAL_CHAR_MARK;
+                                while (index+1 < udfLen && (udfName[index+1] == '/' ||
+                                        udfName[index+1] == 0))
+                                        index++;
+                        }
+                        if (curr == EXT_MARK && (udfLen - index - 1) <= EXT_SIZE)
+                        {
+                                if (udfLen == index + 1)
+                                        hasExt = 0;
+                                else
+                                {
+                                        hasExt = 1;
+                                        extIndex = index;
+                                        newExtIndex = newIndex;
+                                }
+                        }
+                        if (newIndex < 256)
+                                newName[newIndex++] = curr;
+                        else
+                                needsCRC = 1;
+                }
+        }
+        if (needsCRC)
+        {
+                uint8_t ext[EXT_SIZE];
+                int localExtIndex = 0;
+                if (hasExt)
+                {
+                        int maxFilenameLen;
+                        for(index = 0; index<EXT_SIZE && extIndex + index +1 < udfLen;
+                                index++ )
+                        {
+                                curr = udfName[extIndex + index + 1];
+                                if (curr == '/' || curr == 0)
+                                {
+                                        needsCRC = 1;
+                                        curr = ILLEGAL_CHAR_MARK;
+                                        while(extIndex + index + 2 < udfLen && (index + 1 < EXT_SIZE
+                                                && (udfName[extIndex + index + 2] == '/' ||
+                                                        udfName[extIndex + index + 2] == 0)))
+                                                index++;
+                                }
+                                ext[localExtIndex++] = curr;
+                        }
+                        maxFilenameLen = 250 - localExtIndex;
+                        if (newIndex > maxFilenameLen)
+                                newIndex = maxFilenameLen;
+                        else
+                                newIndex = newExtIndex;
+                }
+                else if (newIndex > 250)
+                        newIndex = 250;
+                newName[newIndex++] = CRC_MARK;
+                valueCRC = udf_crc(fidName, fidNameLen, 0);
+                newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
+                newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
+                newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
+                newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
+                if (hasExt)
+                {
+                        newName[newIndex++] = EXT_MARK;
+                        for (index = 0;index < localExtIndex ;index++ )
+                                newName[newIndex++] = ext[index];
+                }
+        }
+        return newIndex;
+}
author	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
committer	Linus Torvalds <torvalds@ppc970.osdl.org>	2005-04-16 18:20:36 -0400
commit	1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch)
tree	0bba044c4ce775e45a88a51686b5d9f90697ea9d /fs/udf/unicode.c

diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c new file mode 100644 index 000000000000..5a80efd8debc --- /dev/null +++ b/fs/udf/unicode.c
@@ -0,0 +1,516 @@
	1	/*
	2	* unicode.c
	3	*
	4	* PURPOSE
	5	* Routines for converting between UTF-8 and OSTA Compressed Unicode.
	6	* Also handles filename mangling
	7	*
	8	* DESCRIPTION
	9	* OSTA Compressed Unicode is explained in the OSTA UDF specification.
	10	* http://www.osta.org/
	11	* UTF-8 is explained in the IETF RFC XXXX.
	12	* ftp://ftp.internic.net/rfc/rfcxxxx.txt
	13	*
	14	* CONTACTS
	15	* E-mail regarding any portion of the Linux UDF file system should be
	16	* directed to the development team's mailing list (run by majordomo):
	17	* linux_udf@hpesjro.fc.hp.com
	18	*
	19	* COPYRIGHT
	20	* This file is distributed under the terms of the GNU General Public
	21	* License (GPL). Copies of the GPL can be obtained from:
	22	* ftp://prep.ai.mit.edu/pub/gnu/GPL
	23	* Each contributing author retains all rights to their own work.
	24	*/
	25
	26	#include "udfdecl.h"
	27
	28	#include <linux/kernel.h>
	29	#include <linux/string.h> /* for memset */
	30	#include <linux/nls.h>
	31	#include <linux/udf_fs.h>
	32
	33	#include "udf_sb.h"
	34
	35	static int udf_translate_to_linux(uint8_t , uint8_t , int, uint8_t *, int);
	36
	37	static int udf_char_to_ustr(struct ustr dest, const uint8_t src, int strlen)
	38	{
	39	if ( (!dest) \|\| (!src) \|\| (!strlen) \|\| (strlen > UDF_NAME_LEN-2) )
	40	return 0;
	41	memset(dest, 0, sizeof(struct ustr));
	42	memcpy(dest->u_name, src, strlen);
	43	dest->u_cmpID = 0x08;
	44	dest->u_len = strlen;
	45	return strlen;
	46	}
	47
	48	/*
	49	* udf_build_ustr
	50	*/
	51	int udf_build_ustr(struct ustr dest, dstring ptr, int size)
	52	{
	53	int usesize;
	54
	55	if ( (!dest) \|\| (!ptr) \|\| (!size) )
	56	return -1;
	57
	58	memset(dest, 0, sizeof(struct ustr));
	59	usesize= (size > UDF_NAME_LEN) ? UDF_NAME_LEN : size;
	60	dest->u_cmpID=ptr[0];
	61	dest->u_len=ptr[size-1];
	62	memcpy(dest->u_name, ptr+1, usesize-1);
	63	return 0;
	64	}
	65
	66	/*
	67	* udf_build_ustr_exact
	68	*/
	69	static int udf_build_ustr_exact(struct ustr dest, dstring ptr, int exactsize)
	70	{
	71	if ( (!dest) \|\| (!ptr) \|\| (!exactsize) )
	72	return -1;
	73
	74	memset(dest, 0, sizeof(struct ustr));
	75	dest->u_cmpID=ptr[0];
	76	dest->u_len=exactsize-1;
	77	memcpy(dest->u_name, ptr+1, exactsize-1);
	78	return 0;
	79	}
	80
	81	/*
	82	* udf_ocu_to_utf8
	83	*
	84	* PURPOSE
	85	* Convert OSTA Compressed Unicode to the UTF-8 equivalent.
	86	*
	87	* DESCRIPTION
	88	* This routine is only called by udf_filldir().
	89	*
	90	* PRE-CONDITIONS
	91	* utf Pointer to UTF-8 output buffer.
	92	* ocu Pointer to OSTA Compressed Unicode input buffer
	93	* of size UDF_NAME_LEN bytes.
	94	* both of type "struct ustr *"
	95	*
	96	* POST-CONDITIONS
	97	* <return> Zero on success.
	98	*
	99	* HISTORY
	100	* November 12, 1997 - Andrew E. Mileski
	101	* Written, tested, and released.
	102	*/
	103	int udf_CS0toUTF8(struct ustr utf_o, struct ustr ocu_i)
	104	{
	105	uint8_t *ocu;
	106	uint32_t c;
	107	uint8_t cmp_id, ocu_len;
	108	int i;
	109
	110	ocu = ocu_i->u_name;
	111
	112	ocu_len = ocu_i->u_len;
	113	cmp_id = ocu_i->u_cmpID;
	114	utf_o->u_len = 0;
	115
	116	if (ocu_len == 0)
	117	{
	118	memset(utf_o, 0, sizeof(struct ustr));
	119	utf_o->u_cmpID = 0;
	120	utf_o->u_len = 0;
	121	return 0;
	122	}
	123
	124	if ((cmp_id != 8) && (cmp_id != 16))
	125	{
	126	printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name);
	127	return 0;
	128	}
	129
	130	for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
	131	{
	132
	133	/* Expand OSTA compressed Unicode to Unicode */
	134	c = ocu[i++];
	135	if (cmp_id == 16)
	136	c = (c << 8) \| ocu[i++];
	137
	138	/* Compress Unicode to UTF-8 */
	139	if (c < 0x80U)
	140	utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
	141	else if (c < 0x800U)
	142	{
	143	utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xc0 \| (c >> 6));
	144	utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 \| (c & 0x3f));
	145	}
	146	else
	147	{
	148	utf_o->u_name[utf_o->u_len++] = (uint8_t)(0xe0 \| (c >> 12));
	149	utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 \| ((c >> 6) & 0x3f));
	150	utf_o->u_name[utf_o->u_len++] = (uint8_t)(0x80 \| (c & 0x3f));
	151	}
	152	}
	153	utf_o->u_cmpID=8;
	154
	155	return utf_o->u_len;
	156	}
	157
	158	/*
	159	*
	160	* udf_utf8_to_ocu
	161	*
	162	* PURPOSE
	163	* Convert UTF-8 to the OSTA Compressed Unicode equivalent.
	164	*
	165	* DESCRIPTION
	166	* This routine is only called by udf_lookup().
	167	*
	168	* PRE-CONDITIONS
	169	* ocu Pointer to OSTA Compressed Unicode output
	170	* buffer of size UDF_NAME_LEN bytes.
	171	* utf Pointer to UTF-8 input buffer.
	172	* utf_len Length of UTF-8 input buffer in bytes.
	173	*
	174	* POST-CONDITIONS
	175	* <return> Zero on success.
	176	*
	177	* HISTORY
	178	* November 12, 1997 - Andrew E. Mileski
	179	* Written, tested, and released.
	180	*/
	181	static int udf_UTF8toCS0(dstring ocu, struct ustr utf, int length)
	182	{
	183	unsigned c, i, max_val, utf_char;
	184	int utf_cnt, u_len;
	185
	186	memset(ocu, 0, sizeof(dstring) * length);
	187	ocu[0] = 8;
	188	max_val = 0xffU;
	189
	190	try_again:
	191	u_len = 0U;
	192	utf_char = 0U;
	193	utf_cnt = 0U;
	194	for (i = 0U; i < utf->u_len; i++)
	195	{
	196	c = (uint8_t)utf->u_name[i];
	197
	198	/* Complete a multi-byte UTF-8 character */
	199	if (utf_cnt)
	200	{
	201	utf_char = (utf_char << 6) \| (c & 0x3fU);
	202	if (--utf_cnt)
	203	continue;
	204	}
	205	else
	206	{
	207	/* Check for a multi-byte UTF-8 character */
	208	if (c & 0x80U)
	209	{
	210	/* Start a multi-byte UTF-8 character */
	211	if ((c & 0xe0U) == 0xc0U)
	212	{
	213	utf_char = c & 0x1fU;
	214	utf_cnt = 1;
	215	}
	216	else if ((c & 0xf0U) == 0xe0U)
	217	{
	218	utf_char = c & 0x0fU;
	219	utf_cnt = 2;
	220	}
	221	else if ((c & 0xf8U) == 0xf0U)
	222	{
	223	utf_char = c & 0x07U;
	224	utf_cnt = 3;
	225	}
	226	else if ((c & 0xfcU) == 0xf8U)
	227	{
	228	utf_char = c & 0x03U;
	229	utf_cnt = 4;
	230	}
	231	else if ((c & 0xfeU) == 0xfcU)
	232	{
	233	utf_char = c & 0x01U;
	234	utf_cnt = 5;
	235	}
	236	else
	237	goto error_out;
	238	continue;
	239	} else
	240	/* Single byte UTF-8 character (most common) */
	241	utf_char = c;
	242	}
	243
	244	/* Choose no compression if necessary */
	245	if (utf_char > max_val)
	246	{
	247	if ( 0xffU == max_val )
	248	{
	249	max_val = 0xffffU;
	250	ocu[0] = (uint8_t)0x10U;
	251	goto try_again;
	252	}
	253	goto error_out;
	254	}
	255
	256	if (max_val == 0xffffU)
	257	{
	258	ocu[++u_len] = (uint8_t)(utf_char >> 8);
	259	}
	260	ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
	261	}
	262
	263
	264	if (utf_cnt)
	265	{
	266	error_out:
	267	ocu[++u_len] = '?';
	268	printk(KERN_DEBUG "udf: bad UTF-8 character\n");
	269	}
	270
	271	ocu[length - 1] = (uint8_t)u_len + 1;
	272	return u_len + 1;
	273	}
	274
	275	static int udf_CS0toNLS(struct nls_table nls, struct ustr utf_o, struct ustr *ocu_i)
	276	{
	277	uint8_t *ocu;
	278	uint32_t c;
	279	uint8_t cmp_id, ocu_len;
	280	int i;
	281
	282	ocu = ocu_i->u_name;
	283
	284	ocu_len = ocu_i->u_len;
	285	cmp_id = ocu_i->u_cmpID;
	286	utf_o->u_len = 0;
	287
	288	if (ocu_len == 0)
	289	{
	290	memset(utf_o, 0, sizeof(struct ustr));
	291	utf_o->u_cmpID = 0;
	292	utf_o->u_len = 0;
	293	return 0;
	294	}
	295
	296	if ((cmp_id != 8) && (cmp_id != 16))
	297	{
	298	printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n", cmp_id, ocu_i->u_name);
	299	return 0;
	300	}
	301
	302	for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN-3)) ;)
	303	{
	304	/* Expand OSTA compressed Unicode to Unicode */
	305	c = ocu[i++];
	306	if (cmp_id == 16)
	307	c = (c << 8) \| ocu[i++];
	308
	309	utf_o->u_len += nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
	310	UDF_NAME_LEN - utf_o->u_len);
	311	}
	312	utf_o->u_cmpID=8;
	313
	314	return utf_o->u_len;
	315	}
	316
	317	static int udf_NLStoCS0(struct nls_table nls, dstring ocu, struct ustr *uni, int length)
	318	{
	319	unsigned len, i, max_val;
	320	uint16_t uni_char;
	321	int u_len;
	322
	323	memset(ocu, 0, sizeof(dstring) * length);
	324	ocu[0] = 8;
	325	max_val = 0xffU;
	326
	327	try_again:
	328	u_len = 0U;
	329	for (i = 0U; i < uni->u_len; i++)
	330	{
	331	len = nls->char2uni(&uni->u_name[i], uni->u_len-i, &uni_char);
	332	if (len <= 0)
	333	continue;
	334
	335	if (uni_char > max_val)
	336	{
	337	max_val = 0xffffU;
	338	ocu[0] = (uint8_t)0x10U;
	339	goto try_again;
	340	}
	341
	342	if (max_val == 0xffffU)
	343	ocu[++u_len] = (uint8_t)(uni_char >> 8);
	344	ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
	345	i += len - 1;
	346	}
	347
	348	ocu[length - 1] = (uint8_t)u_len + 1;
	349	return u_len + 1;
	350	}
	351
	352	int udf_get_filename(struct super_block sb, uint8_t sname, uint8_t *dname, int flen)
	353	{
	354	struct ustr filename, unifilename;
	355	int len;
	356
	357	if (udf_build_ustr_exact(&unifilename, sname, flen))
	358	{
	359	return 0;
	360	}
	361
	362	if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8))
	363	{
	364	if (!udf_CS0toUTF8(&filename, &unifilename) )
	365	{
	366	udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
	367	return 0;
	368	}
	369	}
	370	else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
	371	{
	372	if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, &filename, &unifilename) )
	373	{
	374	udf_debug("Failed in udf_get_filename: sname = %s\n", sname);
	375	return 0;
	376	}
	377	}
	378	else
	379	return 0;
	380
	381	if ((len = udf_translate_to_linux(dname, filename.u_name, filename.u_len,
	382	unifilename.u_name, unifilename.u_len)))
	383	{
	384	return len;
	385	}
	386	return 0;
	387	}
	388
	389	int udf_put_filename(struct super_block sb, const uint8_t sname, uint8_t *dname, int flen)
	390	{
	391	struct ustr unifilename;
	392	int namelen;
	393
	394	if ( !(udf_char_to_ustr(&unifilename, sname, flen)) )
	395	{
	396	return 0;
	397	}
	398
	399	if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8))
	400	{
	401	if ( !(namelen = udf_UTF8toCS0(dname, &unifilename, UDF_NAME_LEN)) )
	402	{
	403	return 0;
	404	}
	405	}
	406	else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP))
	407	{
	408	if ( !(namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname, &unifilename, UDF_NAME_LEN)) )
	409	{
	410	return 0;
	411	}
	412	}
	413	else
	414	return 0;
	415
	416	return namelen;
	417	}
	418
	419	#define ILLEGAL_CHAR_MARK '_'
	420	#define EXT_MARK '.'
	421	#define CRC_MARK '#'
	422	#define EXT_SIZE 5
	423
	424	static int udf_translate_to_linux(uint8_t newName, uint8_t udfName, int udfLen, uint8_t *fidName, int fidNameLen)
	425	{
	426	int index, newIndex = 0, needsCRC = 0;
	427	int extIndex = 0, newExtIndex = 0, hasExt = 0;
	428	unsigned short valueCRC;
	429	uint8_t curr;
	430	const uint8_t hexChar[] = "0123456789ABCDEF";
	431
	432	if (udfName[0] == '.' && (udfLen == 1 \|\|
	433	(udfLen == 2 && udfName[1] == '.')))
	434	{
	435	needsCRC = 1;
	436	newIndex = udfLen;
	437	memcpy(newName, udfName, udfLen);
	438	}
	439	else
	440	{
	441	for (index = 0; index < udfLen; index++)
	442	{
	443	curr = udfName[index];
	444	if (curr == '/' \|\| curr == 0)
	445	{
	446	needsCRC = 1;
	447	curr = ILLEGAL_CHAR_MARK;
	448	while (index+1 < udfLen && (udfName[index+1] == '/' \|\|
	449	udfName[index+1] == 0))
	450	index++;
	451	}
	452	if (curr == EXT_MARK && (udfLen - index - 1) <= EXT_SIZE)
	453	{
	454	if (udfLen == index + 1)
	455	hasExt = 0;
	456	else
	457	{
	458	hasExt = 1;
	459	extIndex = index;
	460	newExtIndex = newIndex;
	461	}
	462	}
	463	if (newIndex < 256)
	464	newName[newIndex++] = curr;
	465	else
	466	needsCRC = 1;
	467	}
	468	}
	469	if (needsCRC)
	470	{
	471	uint8_t ext[EXT_SIZE];
	472	int localExtIndex = 0;
	473
	474	if (hasExt)
	475	{
	476	int maxFilenameLen;
	477	for(index = 0; index<EXT_SIZE && extIndex + index +1 < udfLen;
	478	index++ )
	479	{
	480	curr = udfName[extIndex + index + 1];
	481
	482	if (curr == '/' \|\| curr == 0)
	483	{
	484	needsCRC = 1;
	485	curr = ILLEGAL_CHAR_MARK;
	486	while(extIndex + index + 2 < udfLen && (index + 1 < EXT_SIZE
	487	&& (udfName[extIndex + index + 2] == '/' \|\|
	488	udfName[extIndex + index + 2] == 0)))
	489	index++;
	490	}
	491	ext[localExtIndex++] = curr;
	492	}
	493	maxFilenameLen = 250 - localExtIndex;
	494	if (newIndex > maxFilenameLen)
	495	newIndex = maxFilenameLen;
	496	else
	497	newIndex = newExtIndex;
	498	}
	499	else if (newIndex > 250)
	500	newIndex = 250;
	501	newName[newIndex++] = CRC_MARK;
	502	valueCRC = udf_crc(fidName, fidNameLen, 0);
	503	newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
	504	newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
	505	newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
	506	newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
	507
	508	if (hasExt)
	509	{
	510	newName[newIndex++] = EXT_MARK;
	511	for (index = 0;index < localExtIndex ;index++ )
	512	newName[newIndex++] = ext[index];
	513	}
	514	}
	515	return newIndex;
	516	}