FifeGUI 0.2.0
A C++ GUI library designed for games.
core.hpp
1// Copyright 2006 Nemanja Trifunovic
2
3/*
4Permission is hereby granted, free of charge, to any person or organization
5obtaining a copy of the software and accompanying documentation covered by
6this license (the "Software") to use, reproduce, display, distribute,
7execute, and transmit the Software, and to prepare derivative works of the
8Software, and to permit third-parties to whom the Software is furnished to
9do so, all subject to the following:
10
11The copyright notices in the Software and this entire statement, including
12the above license grant, this restriction and the following disclaimer,
13must be included in all copies of the Software, in whole or in part, and
14all derivative works of the Software, unless such copies or derivative
15works are solely in the form of machine-executable object code generated by
16a source language processor.
17
18THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
21SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
22FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
23ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24DEALINGS IN THE SOFTWARE.
25*/
26
27#ifndef UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
28#define UTF8_FOR_CPP_CORE_H_2675DCD0_9480_4c0c_B92A_CC14C027B731
29
30#include <cstdint>
31#include <iterator>
32
33namespace utf8
34{
35 // The typedefs for 8-bit, 16-bit and 32-bit unsigned integers
36 // You may need to change them to match your system.
37 // These typedefs have the same names as ones from cstdint, or boost/cstdint
38 using uint8_t = unsigned char;
39 using uint16_t = unsigned short;
40 using uint32_t = unsigned int;
41
42 // Helper code - not intended to be directly called by the library users. May be changed at any time
43 namespace internal
44 {
45 // Unicode constants
46 // Leading (high) surrogates: 0xd800 - 0xdbff
47 // Trailing (low) surrogates: 0xdc00 - 0xdfff
48 uint16_t const LEAD_SURROGATE_MIN = 0xd800U;
49 uint16_t const LEAD_SURROGATE_MAX = 0xdbffU;
50 uint16_t const TRAIL_SURROGATE_MIN = 0xdc00U;
51 uint16_t const TRAIL_SURROGATE_MAX = 0xdfffU;
52 uint16_t const LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10);
53 uint32_t const SURROGATE_OFFSET = 0x10000U - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN;
54
55 // Maximum valid value for a Unicode code point
56 uint32_t const CODE_POINT_MAX = 0x0010ffffU;
57
58 template <typename octet_type>
59 inline uint8_t mask8(octet_type oc)
60 {
61 return static_cast<uint8_t>(0xff & oc);
62 }
63 template <typename u16_type>
64 inline uint16_t mask16(u16_type oc)
65 {
66 return static_cast<uint16_t>(0xffff & oc);
67 }
68 template <typename octet_type>
69 inline bool is_trail(octet_type oc)
70 {
71 return ((utf8::internal::mask8(oc) >> 6) == 0x2);
72 }
73
74 template <typename u16>
75 inline bool is_lead_surrogate(u16 cp)
76 {
77 return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX);
78 }
79
80 template <typename u16>
81 inline bool is_trail_surrogate(u16 cp)
82 {
83 return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
84 }
85
86 template <typename u16>
87 inline bool is_surrogate(u16 cp)
88 {
89 return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX);
90 }
91
92 template <typename u32>
93 inline bool is_code_point_valid(u32 cp)
94 {
95 return (cp <= CODE_POINT_MAX && !utf8::internal::is_surrogate(cp));
96 }
97
98 template <typename octet_iterator>
99 inline typename std::iterator_traits<octet_iterator>::difference_type sequence_length(octet_iterator lead_it)
100 {
101 uint8_t lead = utf8::internal::mask8(*lead_it);
102 if (lead < 0x80) {
103 return 1;
104 } else if ((lead >> 5) == 0x6) {
105 return 2;
106 } else if ((lead >> 4) == 0xe) {
107 return 3;
108 } else if ((lead >> 3) == 0x1e) {
109 return 4;
110 } else {
111 return 0;
112 }
113 }
114
115 template <typename octet_difference_type>
116 inline bool is_overlong_sequence(uint32_t cp, octet_difference_type length)
117 {
118 if (cp < 0x80) {
119 if (length != 1) {
120 return true;
121 }
122 } else if (cp < 0x800) {
123 if (length != 2) {
124 return true;
125 }
126 } else if (cp < 0x10000) {
127 if (length != 3) {
128 return true;
129 }
130 }
131
132 return false;
133 }
134
135 enum utf_error
136 {
137 UTF8_OK,
138 NOT_ENOUGH_ROOM,
139 INVALID_LEAD,
140 INCOMPLETE_SEQUENCE,
141 OVERLONG_SEQUENCE,
142 INVALID_CODE_POINT
143 };
144
146 template <typename octet_iterator>
147 utf_error increase_safely(octet_iterator& it, octet_iterator end)
148 {
149 if (++it == end) {
150 return NOT_ENOUGH_ROOM;
151 }
152
153 if (!utf8::internal::is_trail(*it)) {
154 return INCOMPLETE_SEQUENCE;
155 }
156
157 return UTF8_OK;
158 }
159
160#define UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(IT, END) \
161 { \
162 utf_error ret = increase_safely(IT, END); \
163 if (ret != UTF8_OK) \
164 return ret; \
165 }
166
168 template <typename octet_iterator>
169 utf_error get_sequence_1(octet_iterator& it, octet_iterator end, uint32_t& code_point)
170 {
171 if (it == end) {
172 return NOT_ENOUGH_ROOM;
173 }
174
175 code_point = utf8::internal::mask8(*it);
176
177 return UTF8_OK;
178 }
179
180 template <typename octet_iterator>
181 utf_error get_sequence_2(octet_iterator& it, octet_iterator end, uint32_t& code_point)
182 {
183 if (it == end) {
184 return NOT_ENOUGH_ROOM;
185 }
186
187 code_point = utf8::internal::mask8(*it);
188
189 UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
190
191 code_point = ((code_point << 6) & 0x7ff) + ((*it) & 0x3f);
192
193 return UTF8_OK;
194 }
195
196 template <typename octet_iterator>
197 utf_error get_sequence_3(octet_iterator& it, octet_iterator end, uint32_t& code_point)
198 {
199 if (it == end) {
200 return NOT_ENOUGH_ROOM;
201 }
202
203 code_point = utf8::internal::mask8(*it);
204
205 UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
206
207 code_point = ((code_point << 12) & 0xffff) + ((utf8::internal::mask8(*it) << 6) & 0xfff);
208
209 UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
210
211 code_point += (*it) & 0x3f;
212
213 return UTF8_OK;
214 }
215
216 template <typename octet_iterator>
217 utf_error get_sequence_4(octet_iterator& it, octet_iterator end, uint32_t& code_point)
218 {
219 if (it == end) {
220 return NOT_ENOUGH_ROOM;
221 }
222
223 code_point = utf8::internal::mask8(*it);
224
225 UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
226
227 code_point = ((code_point << 18) & 0x1fffff) + ((utf8::internal::mask8(*it) << 12) & 0x3ffff);
228
229 UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
230
231 code_point += (utf8::internal::mask8(*it) << 6) & 0xfff;
232
233 UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR(it, end)
234
235 code_point += (*it) & 0x3f;
236
237 return UTF8_OK;
238 }
239
240#undef UTF8_CPP_INCREASE_AND_RETURN_ON_ERROR
241
242 template <typename octet_iterator>
243 utf_error validate_next(octet_iterator& it, octet_iterator end, uint32_t& code_point)
244 {
245 // Save the original value of it so we can go back in case of failure
246 // Of course, it does not make much sense with i.e. stream iterators
247 octet_iterator original_it = it;
248
249 uint32_t cp = 0;
250 // Determine the sequence length based on the lead octet
251 using octet_difference_type = typename std::iterator_traits<octet_iterator>::difference_type;
252 octet_difference_type const length = utf8::internal::sequence_length(it);
253
254 // Get trail octets and calculate the code point
255 utf_error err = UTF8_OK;
256 switch (length) {
257 case 0:
258 return INVALID_LEAD;
259 case 1:
260 err = utf8::internal::get_sequence_1(it, end, cp);
261 break;
262 case 2:
263 err = utf8::internal::get_sequence_2(it, end, cp);
264 break;
265 case 3:
266 err = utf8::internal::get_sequence_3(it, end, cp);
267 break;
268 case 4:
269 err = utf8::internal::get_sequence_4(it, end, cp);
270 break;
271 }
272
273 if (err == UTF8_OK) {
274 // Decoding succeeded. Now, security checks...
275 if (utf8::internal::is_code_point_valid(cp)) {
276 if (!utf8::internal::is_overlong_sequence(cp, length)) {
277 // Passed! Return here.
278 code_point = cp;
279 ++it;
280 return UTF8_OK;
281 }
282
283 err = OVERLONG_SEQUENCE;
284 } else {
285 err = INVALID_CODE_POINT;
286 }
287 }
288
289 // Failure branch - restore the original value of the iterator
290 it = original_it;
291 return err;
292 }
293
294 template <typename octet_iterator>
295 inline utf_error validate_next(octet_iterator& it, octet_iterator end)
296 {
297 uint32_t ignored = 0;
298 return utf8::internal::validate_next(it, end, ignored);
299 }
300
301 } // namespace internal
302
304
305 // Byte order mark
306 uint8_t const bom[] = {0xef, 0xbb, 0xbf};
307
308 template <typename octet_iterator>
309 octet_iterator find_invalid(octet_iterator start, octet_iterator end)
310 {
311 octet_iterator result = start;
312 while (result != end) {
313 utf8::internal::utf_error err_code = utf8::internal::validate_next(result, end);
314 if (err_code != internal::UTF8_OK) {
315 return result;
316 }
317 }
318 return result;
319 }
320
321 template <typename octet_iterator>
322 inline bool is_valid(octet_iterator start, octet_iterator end)
323 {
324 return (utf8::find_invalid(start, end) == end);
325 }
326
327 template <typename octet_iterator>
328 inline bool starts_with_bom(octet_iterator it, octet_iterator end)
329 {
330 return (
331 ((it != end) && (utf8::internal::mask8(*it++)) == bom[0]) &&
332 ((it != end) && (utf8::internal::mask8(*it++)) == bom[1]) &&
333 ((it != end) && (utf8::internal::mask8(*it)) == bom[2]));
334 }
335
336 // Deprecated in release 2.3
337 template <typename octet_iterator>
338 inline bool is_bom(octet_iterator it)
339 {
340 return (
341 (utf8::internal::mask8(*it++)) == bom[0] && (utf8::internal::mask8(*it++)) == bom[1] &&
342 (utf8::internal::mask8(*it)) == bom[2]);
343 }
344} // namespace utf8
345
346#endif // INCLUDE_FIFECHAN_UTIL_UTF8_UTF8_CORE_HPP_