1 module zencoding.windows949;
2 
3 import std.range : isInputRange, ElementType, hasLength;
4 import std.traits : isScalarType;
5 import zencoding.windows949.table;
6 
7 /**
8  * Allocates a new string that contains the converted utf-16 string
9  * from the provided windows 949 encoded range.
10  *
11  * Params:
12  *   range = windows 949 encoded InputRange which contains scalar types
13  *
14  * Returns:
15  *   A utf-16 string
16  */
17 wstring fromWindows949(R)(R range) pure nothrow @safe
18 if (isInputRange!R && isScalarType!(ElementType!R) && hasLength!R)
19 {
20     wstring decoded;
21 
22     static if ((ElementType!R).sizeof < wchar.sizeof)
23     {
24         decoded.reserve(range.length * 2);
25     }
26     else
27     {
28         decoded.reserve(range.length);
29     }
30 
31     auto lead = 0;
32 
33     import std.range : empty, front, popFront;
34 
35     while(!range.empty)
36     {
37         const character = range.front;
38         if (character == 0)
39         {
40             if (lead > 0)
41             {
42                 decoded ~= cast(wchar) 0xFFFD; // replacement char
43             }
44             break;
45         }
46         else if (lead > 0 && character > 0x40 && character < 0xFF)
47         {
48             const index = cast(ushort) ((lead << 8) | character);
49             auto codePoint = index in cp949_table;
50             if (codePoint is null)
51             {
52                 decoded ~= cast(wchar) 0xFFFD;
53             }
54             else
55             {
56                 decoded ~= cast(wchar) *codePoint;
57             }
58             lead = 0;
59         }
60         else if (character > 0x80 && character < 0xFF)
61         {
62             lead = character;
63         }
64         else if (character < 0x80)
65         {
66             decoded ~= cast(wchar) character;
67         }
68         range.popFront();
69     }
70 
71     return decoded;
72 }
73 
74 ///
75 unittest
76 {
77 
78     const(ubyte[]) cp949 = [0x64, 0x61, 0x74, 0x61, 0x5C, 0x69, 0x6D, 0x66,
79         0x5C, 0xB1, 0xB8, 0xC6, 0xE4, 0xC4, 0xDA, 0x5F,
80         0xC5, 0xA9, 0xB7, 0xE7, 0xBC, 0xBC, 0xC0, 0xCC,
81         0xB4, 0xF5, 0x5F, 0xB3, 0xB2, 0x2E, 0x69, 0x6D,
82         0x66, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
83         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
84         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
85         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
86         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
87         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
88         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
89         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
90         0x00, 0x00, 0x00, 0x00];
91 
92     const(ushort[]) utf16 = [0x64, 0x61, 0x74, 0x61, 0x5C, 0x69, 0x6D, 0x66,
93         0x5C, 0xAD6C, 0xD398, 0xCF54, 0x5F, 0xD06C, 0xB8E8, 0xC138,
94         0xC774, 0xB354, 0x5F, 0xB0A8, 0x2E, 0x69, 0x6D, 0x66];
95 
96     wstring output = fromWindows949(cp949);
97     import std.string : representation;
98     import std.algorithm : equal;
99 
100     auto repr = output.representation;
101 
102     assert(repr.equal(utf16));
103 }
104