Files
ANSLibs/QRCode/libzueci/zueci.h

180 lines
7.6 KiB
C

/* zueci.h - UTF-8 to/from Extended Channel Interpretations */
/*
libzueci - an open source UTF-8 ECI library adapted from libzint
Copyright (C) 2022 gitlost
*/
/* SPDX-License-Identifier: BSD-3-Clause */
#ifndef ZUECI_H
#define ZUECI_H
/* Version: 1.0.1 */
/* Warning and error returns from API functions below */
#define ZUECI_WARN_INVALID_DATA 1 /* Invalid data but replacement character used */
#define ZUECI_ERROR 5 /* Warn/error marker, not returned */
#define ZUECI_ERROR_INVALID_DATA 6 /* Source data invalid or unmappable */
#define ZUECI_ERROR_INVALID_ECI 7 /* ECI not a valid Character Set ECI */
#define ZUECI_ERROR_INVALID_ARGS 8 /* One or more arguments invalid (e.g. NULL) */
#define ZUECI_ERROR_INVALID_UTF8 9 /* Source data not valid UTF-8 */
#ifdef _WIN32
# if defined(DLL_EXPORT) || defined(PIC) || defined(_USRDLL)
# define ZUECI_EXTERN __declspec(dllexport)
# elif defined(ZUECI_DLL)
# define ZUECI_EXTERN __declspec(dllimport)
# else
# define ZUECI_EXTERN extern
# endif
#else
# define ZUECI_EXTERN extern
#endif
#ifdef __cplusplus
extern "C" {
#endif
/*
ECI arg `eci` must be a valid Interpretative Character Set ECI, i.e. 0-13, 15-18, 20-35, 170 or 899,
as defined by AIM ITS/04-023 International Technical Standard - Extended Channel Interpretations
Part 3: Register (Version 2, February 2022):
0 IBM CP437 (top)
1 ISO/IEC 8859-1 - Latin alphabet No. 1 (top)
2 IBM CP437 (top)
3 ISO/IEC 8859-1 - Latin alphabet No. 1 (top)
4 ISO/IEC 8859-2 - Latin alphabet No. 2 (top)
5 ISO/IEC 8859-3 - Latin alphabet No. 3 (top)
6 ISO/IEC 8859-4 - Latin alphabet No. 4 (top)
7 ISO/IEC 8859-5 - Latin/Cyrillic alphabet (top)
8 ISO/IEC 8859-6 - Latin/Arabic alphabet (top)
9 ISO/IEC 8859-7 - Latin/Greek alphabet (top)
10 ISO/IEC 8859-8 - Latin/Hebrew alphabet (top)
11 ISO/IEC 8859-9 - Latin alphabet No. 5 (Turkish) (top)
12 ISO/IEC 8859-10 - Latin alphabet No. 6 (Nordic) (top)
13 ISO/IEC 8859-11 - Latin/Thai alphabet (top)
15 ISO/IEC 8859-13 - Latin alphabet No. 7 (Baltic) (top)
16 ISO/IEC 8859-14 - Latin alphabet No. 8 (Celtic) (top)
17 ISO/IEC 8859-15 - Latin alphabet No. 9 (top)
18 ISO/IEC 8859-16 - Latin alphabet No. 10 (top)
20 Shift JIS (JIS X 0208 and JIS X 0201) Japanese
21 Windows 1250 - Latin 2 (Central Europe)
22 Windows 1251 - Cyrillic
23 Windows 1252 - Latin 1
24 Windows 1256 - Arabic
25 UTF-16BE (big-endian)
26 UTF-8
27 ASCII (ISO/IEC 646 IRV)
28 Big5 (Taiwan) Chinese
29 GB 2312 (PRC) Chinese
30 EUC-KR (KS X 1001:2002) Korean
31 GBK Chinese
32 GB 18030 Chinese
33 UTF-16LE (little-endian)
34 UTF-32BE (big-endian)
35 UTF-32LE (little-endian)
170 ISO/IEC 646 Invariant
899 8-bit binary data
"(top)" means encoding applies to codepoints 0x80..FF (or 0xA0..FF for ISO/IEC 8859) with 0x00..7F as ASCII
ECIs 0, 1 and 2 are obsolete, however ECI 2 is still referenced by ISO/IEC 15438:2015 (PDF417) Annex H.2.3
All except ECI 20 (Shift JIS) and ECI 170 (ISO/IEC 646 Invariant) map ASCII one-to-one (but see
`ZUECI_FLAG_XXX` flags below).
ECI 20 re-maps 2 characters (backslash and tilde), and ECI 170 has no mapping for 12 characters (#$@[\]^`{|}~).
All mappings are the same as libiconv with the following exception for ECI 20 (Shift JIS):
Unicode Shift JIS Unicode
libzueci U+005C -> 0x815F -> U+005C (U+005C REVERSE SOLIDUS)
U+FF3C -> no mapping (U+FF3C FULLWIDTH REVERSE SOLIDUS)
libiconv U+005C -> no mapping
U+FF3C -> 0x815F -> U+FF3C
The rationale for this difference is that libzueci is following the "official" source
https://unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/SHIFTJIS.TXT
(2015-12-02) which gives those mappings. (Note "official" is used loosely, there's no such thing unfortunately.)
Could not find a reason for libiconv doing it its way from reading the source.
All other mappings are the same; in particular:
Unicode Shift JIS Unicode
U+007E -> no mapping (U+007E TILDE)
U+203E -> 0x7E -> U+202E (U+203E OVERLINE)
U+00A5 -> 0x5C -> U+00A5 (U+00A5 YEN SIGN)
*/
/*
If embedding the library (i.e. including the 10 files directly) and only want ECI-to-UTF-8 functionality,
define `ZUECI_EMBED_NO_TO_ECI`
*/
#ifndef ZUECI_EMBED_NO_TO_ECI
/*
Convert UTF-8 `src` of length `src_len` to `eci`-encoded `dest`.
`p_dest_len` is set to length of `dest` on output.
`dest` must be big enough (4-times the `src_len`, or see `zueci_dest_len_eci()`). It is not NUL-terminated.
Returns 0 if successful, one of `ZUECI_ERROR_XXX` if not.
*/
ZUECI_EXTERN int zueci_utf8_to_eci(const int eci, const unsigned char src[], const int src_len,
unsigned char dest[], int *p_dest_len);
/*
Calculate sufficient (i.e. approx.) length needed to convert UTF-8 `src` of length `len` from UTF-8 to ECI
`eci`, and place in `p_dest_len`.
Returns 0 if successful, one of `ZUECI_ERROR_XXX` if not.
*/
ZUECI_EXTERN int zueci_dest_len_eci(const int eci, const unsigned char src[], const int src_len, int *p_dest_len);
#endif /* ZUECI_EMBED_NO_TO_ECI */
/*
These flags can be OR-ed together to change the behaviour of `zueci_eci_to_utf8()` and `zueci_dest_len_utf8()`
*/
/*
For single-byte ECIs copy the source straight-thru rather than erroring or replacing if undefined. Affects
ISO/IEC 8859 (ECIs 1, 3-13, 15-18), Windows 125x (ECIs 21-24), ASCII (ECI 27) & ISO/IEC 646 Invariant (ECI 170).
*/
#define ZUECI_FLAG_SB_STRAIGHT_THRU 1
/*
For ECI 20 Shift JIS, copy backslash & tilde straight-thru rather than mapping to Yen sign & overline resp.
*/
#define ZUECI_FLAG_SJIS_STRAIGHT_THRU 2
/*
If embedding the library (i.e. including the 10 files directly) and only want UTF-8-to-ECI functionality,
define `ZUECI_EMBED_NO_TO_UTF8`
*/
#ifndef ZUECI_EMBED_NO_TO_UTF8
/*
Convert ECI-encoded `src` of length `src_len` to UTF-8 `dest`.
`p_dest_len` is set to length of `dest` on output.
`dest` must be big enough (4-times the `src_len`, or see `zueci_dest_len_utf8()`). It is not NUL-terminated.
If the Unicode BMP `replacement_char` (<= 0xFFFF) is non-zero then it will substituted for all source characters
with no mapping and processing will continue, returning ZUECI_WARN_INVALID_DATA unless other errors.
`flags` can be set with `ZUECI_FLAG_XXX` to change behaviour.
Returns 0 if successful, one of `ZUECI_ERROR_XXX` if not.
*/
ZUECI_EXTERN int zueci_eci_to_utf8(const int eci, const unsigned char src[], const int src_len,
const unsigned int replacement_char, const unsigned int flags, unsigned char dest[],
int *p_dest_len);
/*
Calculate exact length needed to convert ECI-encoded `src` of length `len` from ECI `eci`, and place in
`p_dest_len`.
Returns 0 if successful, one of `ZUECI_ERROR_XXX` if not.
*/
ZUECI_EXTERN int zueci_dest_len_utf8(const int eci, const unsigned char src[], const int src_len,
const unsigned int replacement_char, const unsigned int flags, int *p_dest_len);
#endif /* ZUECI_EMBED_NO_TO_UTF8 */
#ifdef __cplusplus
}
#endif
/* vim: set ts=4 sw=4 et : */
#endif /* ZUECI_H */