ICU 66.1
66.1
common
unicode
utf16.h
Go to the documentation of this file.
1
// © 2016 and later: Unicode, Inc. and others.
2
// License & terms of use: http://www.unicode.org/copyright.html
3
/*
4
*******************************************************************************
5
*
6
* Copyright (C) 1999-2012, International Business Machines
7
* Corporation and others. All Rights Reserved.
8
*
9
*******************************************************************************
10
* file name: utf16.h
11
* encoding: UTF-8
12
* tab size: 8 (not used)
13
* indentation:4
14
*
15
* created on: 1999sep09
16
* created by: Markus W. Scherer
17
*/
18
34
#ifndef __UTF16_H__
35
#define __UTF16_H__
36
37
#include "
unicode/umachine.h
"
38
#ifndef __UTF_H__
39
# include "
unicode/utf.h
"
40
#endif
41
42
/* single-code point definitions -------------------------------------------- */
43
50
#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
51
58
#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
59
66
#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
67
74
#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
75
83
#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
84
92
#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
93
98
#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
99
111
#define U16_GET_SUPPLEMENTARY(lead, trail) \
112
(((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
113
114
122
#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
123
131
#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
132
140
#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
141
147
#define U16_MAX_LENGTH 2
148
166
#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
167
(c)=(s)[i]; \
168
if(U16_IS_SURROGATE(c)) { \
169
if(U16_IS_SURROGATE_LEAD(c)) { \
170
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
171
} else { \
172
(c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
173
} \
174
} \
175
} UPRV_BLOCK_MACRO_END
176
200
#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
201
(c)=(s)[i]; \
202
if(U16_IS_SURROGATE(c)) { \
203
uint16_t __c2; \
204
if(U16_IS_SURROGATE_LEAD(c)) { \
205
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
206
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
207
} \
208
} else { \
209
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
210
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
211
} \
212
} \
213
} \
214
} UPRV_BLOCK_MACRO_END
215
239
#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
240
(c)=(s)[i]; \
241
if(U16_IS_SURROGATE(c)) { \
242
uint16_t __c2; \
243
if(U16_IS_SURROGATE_LEAD(c)) { \
244
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
245
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
246
} else { \
247
(c)=0xfffd; \
248
} \
249
} else { \
250
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
251
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
252
} else { \
253
(c)=0xfffd; \
254
} \
255
} \
256
} \
257
} UPRV_BLOCK_MACRO_END
258
259
/* definitions with forward iteration --------------------------------------- */
260
280
#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
281
(c)=(s)[(i)++]; \
282
if(U16_IS_LEAD(c)) { \
283
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
284
} \
285
} UPRV_BLOCK_MACRO_END
286
308
#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
309
(c)=(s)[(i)++]; \
310
if(U16_IS_LEAD(c)) { \
311
uint16_t __c2; \
312
if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
313
++(i); \
314
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
315
} \
316
} \
317
} UPRV_BLOCK_MACRO_END
318
340
#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
341
(c)=(s)[(i)++]; \
342
if(U16_IS_SURROGATE(c)) { \
343
uint16_t __c2; \
344
if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
345
++(i); \
346
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
347
} else { \
348
(c)=0xfffd; \
349
} \
350
} \
351
} UPRV_BLOCK_MACRO_END
352
366
#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
367
if((uint32_t)(c)<=0xffff) { \
368
(s)[(i)++]=(uint16_t)(c); \
369
} else { \
370
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
371
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
372
} \
373
} UPRV_BLOCK_MACRO_END
374
392
#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
393
if((uint32_t)(c)<=0xffff) { \
394
(s)[(i)++]=(uint16_t)(c); \
395
} else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
396
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
397
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
398
} else
/* c>0x10ffff or not enough space */
{ \
399
(isError)=TRUE; \
400
} \
401
} UPRV_BLOCK_MACRO_END
402
413
#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
414
if(U16_IS_LEAD((s)[(i)++])) { \
415
++(i); \
416
} \
417
} UPRV_BLOCK_MACRO_END
418
432
#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
433
if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
434
++(i); \
435
} \
436
} UPRV_BLOCK_MACRO_END
437
450
#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
451
int32_t __N=(n); \
452
while(__N>0) { \
453
U16_FWD_1_UNSAFE(s, i); \
454
--__N; \
455
} \
456
} UPRV_BLOCK_MACRO_END
457
473
#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
474
int32_t __N=(n); \
475
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
476
U16_FWD_1(s, i, length); \
477
--__N; \
478
} \
479
} UPRV_BLOCK_MACRO_END
480
494
#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
495
if(U16_IS_TRAIL((s)[i])) { \
496
--(i); \
497
} \
498
} UPRV_BLOCK_MACRO_END
499
514
#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
515
if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
516
--(i); \
517
} \
518
} UPRV_BLOCK_MACRO_END
519
520
/* definitions with backward iteration -------------------------------------- */
521
542
#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
543
(c)=(s)[--(i)]; \
544
if(U16_IS_TRAIL(c)) { \
545
(c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
546
} \
547
} UPRV_BLOCK_MACRO_END
548
569
#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
570
(c)=(s)[--(i)]; \
571
if(U16_IS_TRAIL(c)) { \
572
uint16_t __c2; \
573
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
574
--(i); \
575
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
576
} \
577
} \
578
} UPRV_BLOCK_MACRO_END
579
600
#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
601
(c)=(s)[--(i)]; \
602
if(U16_IS_SURROGATE(c)) { \
603
uint16_t __c2; \
604
if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
605
--(i); \
606
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
607
} else { \
608
(c)=0xfffd; \
609
} \
610
} \
611
} UPRV_BLOCK_MACRO_END
612
624
#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
625
if(U16_IS_TRAIL((s)[--(i)])) { \
626
--(i); \
627
} \
628
} UPRV_BLOCK_MACRO_END
629
642
#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
643
if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
644
--(i); \
645
} \
646
} UPRV_BLOCK_MACRO_END
647
661
#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
662
int32_t __N=(n); \
663
while(__N>0) { \
664
U16_BACK_1_UNSAFE(s, i); \
665
--__N; \
666
} \
667
} UPRV_BLOCK_MACRO_END
668
683
#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
684
int32_t __N=(n); \
685
while(__N>0 && (i)>(start)) { \
686
U16_BACK_1(s, start, i); \
687
--__N; \
688
} \
689
} UPRV_BLOCK_MACRO_END
690
704
#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
705
if(U16_IS_LEAD((s)[(i)-1])) { \
706
++(i); \
707
} \
708
} UPRV_BLOCK_MACRO_END
709
727
#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
728
if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
729
++(i); \
730
} \
731
} UPRV_BLOCK_MACRO_END
732
733
#endif
utf.h
C API: Code point macros.
umachine.h
Basic types and constants for UTF.
Generated by
1.8.17