/* zueci.h - UTF-8 to/from Extended Channel Interpretations */
/*
    libzueci - an open source UTF-8 ECI library adapted from libzint
    Copyright (C) 2022 gitlost
 */
/* SPDX-License-Identifier: BSD-3-Clause */

#ifndef ZUECI_H
#define ZUECI_H

/* Version: 1.0.1 */

/* Warning and error returns from API functions below */
#define ZUECI_WARN_INVALID_DATA     1   /* Invalid data but replacement character used */
#define ZUECI_ERROR                 5   /* Warn/error marker, not returned */
#define ZUECI_ERROR_INVALID_DATA    6   /* Source data invalid or unmappable */
#define ZUECI_ERROR_INVALID_ECI     7   /* ECI not a valid Character Set ECI */
#define ZUECI_ERROR_INVALID_ARGS    8   /* One or more arguments invalid (e.g. NULL) */
#define ZUECI_ERROR_INVALID_UTF8    9   /* Source data not valid UTF-8 */

#ifdef _WIN32
#  if defined(DLL_EXPORT) || defined(PIC) || defined(_USRDLL)
#    define ZUECI_EXTERN __declspec(dllexport)
#  elif defined(ZUECI_DLL)
#    define ZUECI_EXTERN __declspec(dllimport)
#  else
#    define ZUECI_EXTERN extern
#  endif
#else
#  define ZUECI_EXTERN extern
#endif

#ifdef __cplusplus
extern "C" {
#endif

/*
    ECI arg `eci` must be a valid Interpretative Character Set ECI, i.e. 0-13, 15-18, 20-35, 170 or 899,
    as defined by AIM ITS/04-023 International Technical Standard - Extended Channel Interpretations
    Part 3: Register (Version 2, February 2022):

        0    IBM CP437 (top)
        1    ISO/IEC 8859-1 - Latin alphabet No. 1 (top)
        2    IBM CP437 (top)
        3    ISO/IEC 8859-1 - Latin alphabet No. 1 (top)
        4    ISO/IEC 8859-2 - Latin alphabet No. 2 (top)
        5    ISO/IEC 8859-3 - Latin alphabet No. 3 (top)
        6    ISO/IEC 8859-4 - Latin alphabet No. 4 (top)
        7    ISO/IEC 8859-5 - Latin/Cyrillic alphabet (top)
        8    ISO/IEC 8859-6 - Latin/Arabic alphabet (top)
        9    ISO/IEC 8859-7 - Latin/Greek alphabet (top)
        10   ISO/IEC 8859-8 - Latin/Hebrew alphabet (top)
        11   ISO/IEC 8859-9 - Latin alphabet No. 5 (Turkish) (top)
        12   ISO/IEC 8859-10 - Latin alphabet No. 6 (Nordic) (top)
        13   ISO/IEC 8859-11 - Latin/Thai alphabet (top)
        15   ISO/IEC 8859-13 - Latin alphabet No. 7 (Baltic) (top)
        16   ISO/IEC 8859-14 - Latin alphabet No. 8 (Celtic) (top)
        17   ISO/IEC 8859-15 - Latin alphabet No. 9 (top)
        18   ISO/IEC 8859-16 - Latin alphabet No. 10 (top)
        20   Shift JIS (JIS X 0208 and JIS X 0201) Japanese
        21   Windows 1250 - Latin 2 (Central Europe)
        22   Windows 1251 - Cyrillic
        23   Windows 1252 - Latin 1
        24   Windows 1256 - Arabic
        25   UTF-16BE (big-endian)
        26   UTF-8
        27   ASCII (ISO/IEC 646 IRV)
        28   Big5 (Taiwan) Chinese
        29   GB 2312 (PRC) Chinese
        30   EUC-KR (KS X 1001:2002) Korean
        31   GBK Chinese
        32   GB 18030 Chinese
        33   UTF-16LE (little-endian)
        34   UTF-32BE (big-endian)
        35   UTF-32LE (little-endian)
        170  ISO/IEC 646 Invariant
        899  8-bit binary data

    "(top)" means encoding applies to codepoints 0x80..FF (or 0xA0..FF for ISO/IEC 8859) with 0x00..7F as ASCII

    ECIs 0, 1 and 2 are obsolete, however ECI 2 is still referenced by ISO/IEC 15438:2015 (PDF417) Annex H.2.3

    All except ECI 20 (Shift JIS) and ECI 170 (ISO/IEC 646 Invariant) map ASCII one-to-one (but see
    `ZUECI_FLAG_XXX` flags below).
    ECI 20 re-maps 2 characters (backslash and tilde), and ECI 170 has no mapping for 12 characters (#$@[\]^`{|}~).

    All mappings are the same as libiconv with the following exception for ECI 20 (Shift JIS):
                    Unicode     Shift JIS   Unicode
        libzueci    U+005C  ->  0x815F  ->  U+005C  (U+005C REVERSE SOLIDUS)
                    U+FF3C  ->  no mapping          (U+FF3C FULLWIDTH REVERSE SOLIDUS)

        libiconv    U+005C  ->  no mapping
                    U+FF3C  ->  0x815F  ->  U+FF3C
    The rationale for this difference is that libzueci is following the "official" source
        https://unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/SHIFTJIS.TXT
    (2015-12-02) which gives those mappings. (Note "official" is used loosely, there's no such thing unfortunately.)
    Could not find a reason for libiconv doing it its way from reading the source.

    All other mappings are the same; in particular:
                    Unicode     Shift JIS   Unicode
                    U+007E  ->  no mapping          (U+007E TILDE)
                    U+203E  ->  0x7E    ->  U+202E  (U+203E OVERLINE)
                    U+00A5  ->  0x5C    ->  U+00A5  (U+00A5 YEN SIGN)
 */

/*
    If embedding the library (i.e. including the 10 files directly) and only want ECI-to-UTF-8 functionality,
    define `ZUECI_EMBED_NO_TO_ECI`
*/
#ifndef ZUECI_EMBED_NO_TO_ECI

/*
    Convert UTF-8 `src` of length `src_len` to `eci`-encoded `dest`.
    `p_dest_len` is set to length of `dest` on output.
    `dest` must be big enough (4-times the `src_len`, or see `zueci_dest_len_eci()`). It is not NUL-terminated.
    Returns 0 if successful, one of `ZUECI_ERROR_XXX` if not.
 */
ZUECI_EXTERN int zueci_utf8_to_eci(const int eci, const unsigned char src[], const int src_len,
                    unsigned char dest[], int *p_dest_len);

/*
    Calculate sufficient (i.e. approx.) length needed to convert UTF-8 `src` of length `len` from UTF-8 to ECI
    `eci`, and place in `p_dest_len`.
    Returns 0 if successful, one of `ZUECI_ERROR_XXX` if not.
 */
ZUECI_EXTERN int zueci_dest_len_eci(const int eci, const unsigned char src[], const int src_len, int *p_dest_len);

#endif /* ZUECI_EMBED_NO_TO_ECI */

/*
    These flags can be OR-ed together to change the behaviour of `zueci_eci_to_utf8()` and `zueci_dest_len_utf8()`
 */

/*
    For single-byte ECIs copy the source straight-thru rather than erroring or replacing if undefined. Affects
    ISO/IEC 8859 (ECIs 1, 3-13, 15-18), Windows 125x (ECIs 21-24), ASCII (ECI 27) & ISO/IEC 646 Invariant (ECI 170).
 */
#define ZUECI_FLAG_SB_STRAIGHT_THRU     1

/*
    For ECI 20 Shift JIS, copy backslash & tilde straight-thru rather than mapping to Yen sign & overline resp.
 */
#define ZUECI_FLAG_SJIS_STRAIGHT_THRU   2

/*
    If embedding the library (i.e. including the 10 files directly) and only want UTF-8-to-ECI functionality,
    define `ZUECI_EMBED_NO_TO_UTF8`
*/
#ifndef ZUECI_EMBED_NO_TO_UTF8

/*
    Convert ECI-encoded `src` of length `src_len` to UTF-8 `dest`.
    `p_dest_len` is set to length of `dest` on output.
    `dest` must be big enough (4-times the `src_len`, or see `zueci_dest_len_utf8()`). It is not NUL-terminated.
    If the Unicode BMP `replacement_char` (<= 0xFFFF) is non-zero then it will substituted for all source characters
    with no mapping and processing will continue, returning ZUECI_WARN_INVALID_DATA unless other errors.
    `flags` can be set with `ZUECI_FLAG_XXX` to change behaviour.
    Returns 0 if successful, one of `ZUECI_ERROR_XXX` if not.
 */
ZUECI_EXTERN int zueci_eci_to_utf8(const int eci, const unsigned char src[], const int src_len,
                    const unsigned int replacement_char, const unsigned int flags, unsigned char dest[],
                    int *p_dest_len);

/*
    Calculate exact length needed to convert ECI-encoded `src` of length `len` from ECI `eci`, and place in
    `p_dest_len`.
    Returns 0 if successful, one of `ZUECI_ERROR_XXX` if not.
 */
ZUECI_EXTERN int zueci_dest_len_utf8(const int eci, const unsigned char src[], const int src_len,
                    const unsigned int replacement_char, const unsigned int flags, int *p_dest_len);

#endif /* ZUECI_EMBED_NO_TO_UTF8 */

#ifdef __cplusplus
}
#endif

/* vim: set ts=4 sw=4 et : */
#endif /* ZUECI_H */