ANSLibs/QRCode/pdf417/PDFDecoder.cpp

/*
* Copyright 2016 Nu-book Inc.
* Copyright 2016 ZXing authors
*/
// SPDX-License-Identifier: Apache-2.0

#include "PDFDecoder.h"

#include "CharacterSet.h"
#include "DecoderResult.h"
#include "PDFDecoderResultExtra.h"
#include "ZXAlgorithms.h"
#include "ZXBigInteger.h"
#include "ZXTestSupport.h"

#include <array>
#include <cassert>
#include <sstream>
#include <utility>

namespace ZXing::Pdf417 {

enum class Mode
{
	ALPHA,
	LOWER,
	MIXED,
	PUNCT,
	ALPHA_SHIFT,
	PUNCT_SHIFT
};

constexpr int TEXT_COMPACTION_MODE_LATCH = 900;
constexpr int BYTE_COMPACTION_MODE_LATCH = 901;
constexpr int NUMERIC_COMPACTION_MODE_LATCH = 902;
// 903-912 reserved
constexpr int MODE_SHIFT_TO_BYTE_COMPACTION_MODE = 913;
// 914-917 reserved
constexpr int LINKAGE_OTHER = 918;
// 919 reserved
constexpr int LINKAGE_EANUCC = 920; // GS1 Composite
constexpr int READER_INIT = 921; // Reader Initialisation/Programming
constexpr int MACRO_PDF417_TERMINATOR = 922;
constexpr int BEGIN_MACRO_PDF417_OPTIONAL_FIELD = 923;
constexpr int BYTE_COMPACTION_MODE_LATCH_6 = 924;
constexpr int ECI_USER_DEFINED = 925; // 810900-811799 (1 codeword)
constexpr int ECI_GENERAL_PURPOSE = 926; // 900-810899 (2 codewords)
constexpr int ECI_CHARSET = 927; // 0-899 (1 codeword)
constexpr int BEGIN_MACRO_PDF417_CONTROL_BLOCK = 928;

constexpr int MAX_NUMERIC_CODEWORDS = 15;

constexpr int MACRO_PDF417_OPTIONAL_FIELD_FILE_NAME = 0;
constexpr int MACRO_PDF417_OPTIONAL_FIELD_SEGMENT_COUNT = 1;
constexpr int MACRO_PDF417_OPTIONAL_FIELD_TIME_STAMP = 2;
constexpr int MACRO_PDF417_OPTIONAL_FIELD_SENDER = 3;
constexpr int MACRO_PDF417_OPTIONAL_FIELD_ADDRESSEE = 4;
constexpr int MACRO_PDF417_OPTIONAL_FIELD_FILE_SIZE = 5;
constexpr int MACRO_PDF417_OPTIONAL_FIELD_CHECKSUM = 6;

static const char* PUNCT_CHARS = ";<>@[\\]_`~!\r\t,:\n-.$/\"|*()?{}'";
static const char* MIXED_CHARS = "0123456789&\r\t,:#-.$/+%*=^";

constexpr int NUMBER_OF_SEQUENCE_CODEWORDS = 2;

inline bool IsECI(int code)
{
	return code >= ECI_USER_DEFINED && code <= ECI_CHARSET;
}

/**
* Whether a codeword terminates a Compaction mode.
*
* See ISO/IEC 15438:2015 5.4.2.5 (Text), 5.4.3.4 (Byte), 5.4.4.3 (Numeric)
*/
static bool TerminatesCompaction(int code)
{
	switch (code) {
	case TEXT_COMPACTION_MODE_LATCH:
	case BYTE_COMPACTION_MODE_LATCH:
	case NUMERIC_COMPACTION_MODE_LATCH:
	case BYTE_COMPACTION_MODE_LATCH_6:
	case BEGIN_MACRO_PDF417_CONTROL_BLOCK:
	case BEGIN_MACRO_PDF417_OPTIONAL_FIELD:
	case MACRO_PDF417_TERMINATOR: return true;
	}
	return false;
}

/**
* Helper to process ECIs.
**/
static int ProcessECI(const std::vector<int>& codewords, int codeIndex, const int length, const int code, Content& result)
{
	if (codeIndex < length && IsECI(code)) {
		if (code == ECI_CHARSET)
			result.switchEncoding(ECI(codewords[codeIndex++]));
		else
			codeIndex += code == ECI_GENERAL_PURPOSE ? 2 : 1; // Don't currently handle non-character set ECIs so just ignore
	}

	return codeIndex;
}

/**
* The Text Compaction mode includes all the printable ASCII characters
* (i.e. values from 32 to 126) and three ASCII control characters: HT or tab
* (ASCII value 9), LF or line feed (ASCII value 10), and CR or carriage
* return (ASCII value 13). The Text Compaction mode also includes various latch
* and shift characters which are used exclusively within the mode. The Text
* Compaction mode encodes up to 2 characters per codeword. The compaction rules
* for converting data into PDF417 codewords are defined in 5.4.2.2. The sub-mode
* switches are defined in 5.4.2.3.
*
* @param textCompactionData The text compaction data.
* @param length             The size of the text compaction data.
* @param result             The data in the character set encoding.
*/
static void DecodeTextCompaction(const std::vector<int>& textCompactionData, int length, Content& result)
{
	// Beginning from an initial state of the Alpha sub-mode
	// The default compaction mode for PDF417 in effect at the start of each symbol shall always be Text
	// Compaction mode Alpha sub-mode (uppercase alphabetic). A latch codeword from another mode to the Text
	// Compaction mode shall always switch to the Text Compaction Alpha sub-mode.
	Mode subMode = Mode::ALPHA;
	Mode priorToShiftMode = Mode::ALPHA;
	int i = 0;
	while (i < length) {
		int subModeCh = textCompactionData[i];

		// Note only have ECI and MODE_SHIFT_TO_BYTE_COMPACTION_MODE function codewords in text compaction array
		if (IsECI(subModeCh)) {
			i = ProcessECI(textCompactionData, i + 1, length, subModeCh, result);
			continue;
		}
		if (subModeCh == MODE_SHIFT_TO_BYTE_COMPACTION_MODE) {
			i++;
			while (i < length && IsECI(textCompactionData[i]))
				i = ProcessECI(textCompactionData, i + 1, length, textCompactionData[i], result);

			if (i < length)
				result.push_back((uint8_t)textCompactionData[i++]);

			continue;
		}

		char ch = 0;
		switch (subMode) {
		case Mode::ALPHA:
		case Mode::LOWER:
			// Alpha (uppercase alphabetic) or Lower (lowercase alphabetic)
			if (subModeCh < 26) {
				// Upper/lowercase character
				ch = (char)((subMode == Mode::ALPHA ? 'A' : 'a') + subModeCh);
			} else if (subModeCh == 26) { // Space
				ch = ' ';
			} else if (subModeCh == 27 && subMode == Mode::ALPHA) { // LL
				subMode = Mode::LOWER;
			} else if (subModeCh == 27 && subMode == Mode::LOWER) { // AS
				// Shift to alpha
				priorToShiftMode = subMode;
				subMode = Mode::ALPHA_SHIFT;
			} else if (subModeCh == 28) { // ML
				subMode = Mode::MIXED;
			}
			// 29 PS - ignore if last or followed by Shift to Byte, 5.4.2.4 (b) (1)
			else if (i + 1 < length && textCompactionData[i + 1] != MODE_SHIFT_TO_BYTE_COMPACTION_MODE) {
				// Shift to punctuation
				priorToShiftMode = subMode;
				subMode = Mode::PUNCT_SHIFT;
			}
			break;

		case Mode::MIXED:
			// Mixed (numeric and some punctuation)
			if (subModeCh < 25) {
				ch = MIXED_CHARS[subModeCh];
			} else if (subModeCh == 25) { // PL
				subMode = Mode::PUNCT;
			} else if (subModeCh == 26) { // Space
				ch = ' ';
			} else if (subModeCh == 27) { // LL
				subMode = Mode::LOWER;
			} else if (subModeCh == 28) { // AL
				subMode = Mode::ALPHA;
			}
			// 29 PS - ignore if last or followed by Shift to Byte, 5.4.2.4 (b) (1)
			else if (i + 1 < length && textCompactionData[i + 1] != MODE_SHIFT_TO_BYTE_COMPACTION_MODE) {
				// Shift to punctuation
				priorToShiftMode = subMode;
				subMode = Mode::PUNCT_SHIFT;
			}
			break;

		case Mode::PUNCT:
			// Punctuation
			if (subModeCh < 29)
				ch = PUNCT_CHARS[subModeCh];
			else // 29 AL - note not ignored if followed by Shift to Byte, 5.4.2.4 (b) (2)
				subMode = Mode::ALPHA;
			break;

		case Mode::ALPHA_SHIFT:
			// Restore sub-mode
			subMode = priorToShiftMode;
			if (subModeCh < 26)
				ch = (char)('A' + subModeCh);
			else if (subModeCh == 26) // Space
				ch = ' ';
			// 27 LL, 28 ML, 29 PS used as padding
			break;

		case Mode::PUNCT_SHIFT:
			// Restore sub-mode
			subMode = priorToShiftMode;
			if (subModeCh < 29)
				ch = PUNCT_CHARS[subModeCh];
			else // 29 AL
				subMode = Mode::ALPHA;
			break;
		}
		if (ch != 0)
			result.push_back(ch); // Append decoded character to result
		i++;
	}
}

/*
* Helper to put ECI codewords into Text Compaction array.
*/
static int ProcessTextECI(std::vector<int>& textCompactionData, int& index, const std::vector<int>& codewords, int codeIndex,
						  const int code)
{
	textCompactionData[index++] = code;
	if (codeIndex < codewords[0]) {
		textCompactionData[index++] = codewords[codeIndex++];
		if (codeIndex < codewords[0] && code == ECI_GENERAL_PURPOSE) {
			textCompactionData[index++] = codewords[codeIndex++];
		}
	}

	return codeIndex;
}

/**
* Text Compaction mode (see 5.4.1.5) permits all printable ASCII characters to be
* encoded, i.e. values 32 - 126 inclusive in accordance with ISO/IEC 646 (IRV), as
* well as selected control characters.
*
* @param codewords     The array of codewords (data + error)
* @param codeIndex     The current index into the codeword array.
* @param result        The data in the character set encoding.
* @return The next index into the codeword array.
*/
static int TextCompaction(const std::vector<int>& codewords, int codeIndex, Content& result)
{
	// 2 characters per codeword
	std::vector<int> textCompactionData((codewords[0] - codeIndex) * 2, 0);

	int index = 0;
	bool end = false;

	while ((codeIndex < codewords[0]) && !end) {
		int code = codewords[codeIndex++];
		if (code < TEXT_COMPACTION_MODE_LATCH) {
			textCompactionData[index] = code / 30;
			textCompactionData[index + 1] = code % 30;
			index += 2;
		} else {
			switch (code) {
			case MODE_SHIFT_TO_BYTE_COMPACTION_MODE:
				// The Mode Shift codeword 913 shall cause a temporary
				// switch from Text Compaction mode to Byte Compaction mode.
				// This switch shall be in effect for only the next codeword,
				// after which the mode shall revert to the prevailing sub-mode
				// of the Text Compaction mode. Codeword 913 is only available
				// in Text Compaction mode; its use is described in 5.4.2.4.
				textCompactionData[index++] = MODE_SHIFT_TO_BYTE_COMPACTION_MODE;
				// 5.5.3.1 allows ECIs anywhere in Text Compaction, including after a Shift to Byte
				while (codeIndex < codewords[0] && IsECI(codewords[codeIndex])) {
					codeIndex = ProcessTextECI(textCompactionData, index, codewords, codeIndex + 1, codewords[codeIndex]);
				}
				if (codeIndex < codewords[0])
					textCompactionData[index++] = codewords[codeIndex++]; // Byte to shift
				break;
			case ECI_CHARSET:
			case ECI_GENERAL_PURPOSE:
			case ECI_USER_DEFINED:
				codeIndex = ProcessTextECI(textCompactionData, index, codewords, codeIndex, code);
				break;
			default:
				if (!TerminatesCompaction(code))
					throw FormatError();

				codeIndex--;
				end = true;
				break;
			}
		}
	}
	DecodeTextCompaction(textCompactionData, index, result);
	return codeIndex;
}

/*
* Helper for Byte Compaction to look ahead and count 5-codeword batches and trailing bytes, with some checking of
* format errors.
*/
static int CountByteBatches(int mode, const std::vector<int>& codewords, int codeIndex, int& trailingCount)
{
	int count = 0;
	trailingCount = 0;

	while (codeIndex < codewords[0]) {
		int code = codewords[codeIndex++];
		if (code >= TEXT_COMPACTION_MODE_LATCH) {
			if (mode == BYTE_COMPACTION_MODE_LATCH_6 && count && count % 5)
				throw FormatError();

			if (IsECI(code)) {
				codeIndex += code == ECI_GENERAL_PURPOSE ? 2 : 1;
				continue;
			}
			if (!TerminatesCompaction(code))
				throw FormatError();
			break;
		}
		count++;
	}
	if (codeIndex > codewords[0])
		throw FormatError();

	if (count == 0)
		return 0;

	if (mode == BYTE_COMPACTION_MODE_LATCH) {
		trailingCount = count % 5;
		if (trailingCount == 0) {
			trailingCount = 5;
			count -= 5;
		}
	} else { // BYTE_COMPACTION_MODE_LATCH_6
		if (count % 5 != 0)
			throw FormatError();
	}

	return count / 5;
}

/*
* Helper to handle Byte Compaction ECIs.
*/
static int ProcessByteECIs(const std::vector<int>& codewords, int codeIndex, Content& result)
{
	while (codeIndex < codewords[0] && codewords[codeIndex] >= TEXT_COMPACTION_MODE_LATCH
			&& !TerminatesCompaction(codewords[codeIndex])) {
		int code = codewords[codeIndex++];
		if (IsECI(code))
			codeIndex = ProcessECI(codewords, codeIndex, codewords[0], code, result);
	}

	return codeIndex;
}

/**
* Byte Compaction mode (see 5.4.3) permits all 256 possible 8-bit byte values to be encoded.
* This includes all ASCII characters value 0 to 127 inclusive and provides for international
* character set support.
*
* @param mode          The byte compaction mode i.e. 901 or 924
* @param codewords     The array of codewords (data + error)
* @param codeIndex     The current index into the codeword array.
* @param result        The data in the character set encoding.
* @return The next index into the codeword array.
*/
static int ByteCompaction(int mode, const std::vector<int>& codewords, int codeIndex, Content& result)
{
	// Count number of 5-codeword batches and trailing bytes
	int trailingCount;
	int batches = CountByteBatches(mode, codewords, codeIndex, trailingCount);

	// Deal with initial ECIs
	codeIndex = ProcessByteECIs(codewords, codeIndex, result);

	for (int batch = 0; batch < batches; batch++) {
		int64_t value = 0;
		for (int count = 0; count < 5; count++)
			value = 900 * value + codewords[codeIndex++];

		for (int j = 0; j < 6; ++j)
			result.push_back((uint8_t)(value >> (8 * (5 - j))));

		// Deal with inter-batch ECIs
		codeIndex = ProcessByteECIs(codewords, codeIndex, result);
	}

	for (int i = 0; i < trailingCount; i++) {
		result.push_back((uint8_t)codewords[codeIndex++]);
		// Deal with inter-byte ECIs
		codeIndex = ProcessByteECIs(codewords, codeIndex, result);
	}

	return codeIndex;
}


/**
* Convert a list of Numeric Compacted codewords from Base 900 to Base 10.
*
* @param codewords The array of codewords
* @param count     The number of codewords
* @return The decoded string representing the Numeric data.
*/
/*
EXAMPLE
Encode the fifteen digit numeric string 000213298174000
Prefix the numeric string with a 1 and set the initial value of
t = 1 000 213 298 174 000
Calculate codeword 0
d0 = 1 000 213 298 174 000 mod 900 = 200

t = 1 000 213 298 174 000 div 900 = 1 111 348 109 082
Calculate codeword 1
d1 = 1 111 348 109 082 mod 900 = 282

t = 1 111 348 109 082 div 900 = 1 234 831 232
Calculate codeword 2
d2 = 1 234 831 232 mod 900 = 632

t = 1 234 831 232 div 900 = 1 372 034
Calculate codeword 3
d3 = 1 372 034 mod 900 = 434

t = 1 372 034 div 900 = 1 524
Calculate codeword 4
d4 = 1 524 mod 900 = 624

t = 1 524 div 900 = 1
Calculate codeword 5
d5 = 1 mod 900 = 1
t = 1 div 900 = 0
Codeword sequence is: 1, 624, 434, 632, 282, 200

Decode the above codewords involves
1 x 900 power of 5 + 624 x 900 power of 4 + 434 x 900 power of 3 +
632 x 900 power of 2 + 282 x 900 power of 1 + 200 x 900 power of 0 = 1000213298174000

Remove leading 1 =>  Result is 000213298174000
*/
static std::string DecodeBase900toBase10(const std::vector<int>& codewords, int endIndex, int count)
{
	// Table containing values for the exponent of 900.
	static const auto EXP900 = []() {
		std::array<BigInteger, 16> table = {1, 900};
		for (size_t i = 2; i < table.size(); ++i)
			table[i] = table[i - 1] * 900;
		return table;
	}();

	assert(count <= 16);

	BigInteger result;
	for (int i = 0; i < count; i++)
		result += EXP900[count - i - 1] * codewords[endIndex - count + i];

	std::string resultString = result.toString();
	if (!resultString.empty() && resultString.front() == '1')
		return resultString.substr(1);

	throw FormatError();
}


/**
* Numeric Compaction mode (see 5.4.4) permits efficient encoding of numeric data strings.
*
* @param codewords The array of codewords (data + error)
* @param codeIndex The current index into the codeword array.
* @param result    The decoded data is appended to the result.
* @return The next index into the codeword array.
*/
static int NumericCompaction(const std::vector<int>& codewords, int codeIndex, Content& result)
{
	int count = 0;

	while (codeIndex < codewords[0]) {
		int code = codewords[codeIndex];
		if (code < TEXT_COMPACTION_MODE_LATCH) {
			count++;
			codeIndex++;
		}
		if (count > 0 && (count == MAX_NUMERIC_CODEWORDS || codeIndex == codewords[0] || code >= TEXT_COMPACTION_MODE_LATCH)) {
			result += DecodeBase900toBase10(codewords, codeIndex, count);
			count = 0;
		}

		if (code >= TEXT_COMPACTION_MODE_LATCH) {
			if (IsECI(code)) {
				// As operating in Basic Channel Mode (i.e. not embedding backslashed ECIs and doubling backslashes)
				// allow ECIs anywhere in Numeric Compaction (i.e. ISO/IEC 15438:2015 5.5.3.4 doesn't apply).
				codeIndex = ProcessECI(codewords, codeIndex + 1, codewords[0], code, result);
			} else if (TerminatesCompaction(code)) {
				break;
			} else {
				throw FormatError();
			}
		}
	}
	return codeIndex;
}

/*
* Helper to deal with optional text fields in Macros.
*/
static int DecodeMacroOptionalTextField(const std::vector<int>& codewords, int codeIndex, std::string& field)
{
	Content result;
	// Each optional field begins with an implied reset to ECI 2 (Annex H.2.3). ECI 2 is ASCII for 0-127, and Cp437
	// for non-ASCII (128-255). Text optional fields can contain ECIs.
	result.defaultCharset = CharacterSet::Cp437;

	codeIndex = TextCompaction(codewords, codeIndex, result);

	// Converting to UTF-8 (backward-incompatible change for non-ASCII chars)
	field = result.utf8();

	return codeIndex;
}

/*
* Helper to deal with optional numeric fields in Macros.
*/
static int DecodeMacroOptionalNumericField(const std::vector<int>& codewords, int codeIndex, uint64_t& field)
{
	Content result;
	// Each optional field begins with an implied reset to ECI 2 (Annex H.2.3). ECI 2 is ASCII for 0-127, and Cp437
	// for non-ASCII (128-255). Text optional fields can contain ECIs.
	result.defaultCharset = CharacterSet::Cp437;

	codeIndex = NumericCompaction(codewords, codeIndex, result);

	field = std::stoll(result.utf8());

	return codeIndex;
}

ZXING_EXPORT_TEST_ONLY
int DecodeMacroBlock(const std::vector<int>& codewords, int codeIndex, DecoderResultExtra& resultMetadata)
{
	// we must have at least two codewords left for the segment index
	if (codeIndex + NUMBER_OF_SEQUENCE_CODEWORDS > codewords[0])
		throw FormatError();

	std::string strBuf = DecodeBase900toBase10(codewords, codeIndex += NUMBER_OF_SEQUENCE_CODEWORDS, NUMBER_OF_SEQUENCE_CODEWORDS);

	resultMetadata.setSegmentIndex(std::stoi(strBuf));

	// Decoding the fileId codewords as 0-899 numbers, each 0-filled to width 3. This follows the spec
	// (See ISO/IEC 15438:2015 Annex H.6) and preserves all info, but some generators (e.g. TEC-IT) write
	// the fileId using text compaction, so in those cases the fileId will appear mangled.
	std::ostringstream fileId;
	for (; codeIndex < codewords[0] && codewords[codeIndex] != MACRO_PDF417_TERMINATOR
		   && codewords[codeIndex] != BEGIN_MACRO_PDF417_OPTIONAL_FIELD;
		 codeIndex++) {
		fileId << ToString(codewords[codeIndex], 3);
	}
	resultMetadata.setFileId(fileId.str());

	int optionalFieldsStart = -1;
	if (codeIndex < codewords[0] && codewords[codeIndex] == BEGIN_MACRO_PDF417_OPTIONAL_FIELD)
		optionalFieldsStart = codeIndex + 1;

	while (codeIndex < codewords[0]) {
		switch (codewords[codeIndex]) {
		case BEGIN_MACRO_PDF417_OPTIONAL_FIELD: {
			codeIndex++;
			if (codeIndex >= codewords[0])
				break;
			switch (codewords[codeIndex]) {
			case MACRO_PDF417_OPTIONAL_FIELD_FILE_NAME: {
				std::string fileName;
				codeIndex = DecodeMacroOptionalTextField(codewords, codeIndex + 1, fileName);
				resultMetadata.setFileName(fileName);
				break;
			}
			case MACRO_PDF417_OPTIONAL_FIELD_SENDER: {
				std::string sender;
				codeIndex = DecodeMacroOptionalTextField(codewords, codeIndex + 1, sender);
				resultMetadata.setSender(sender);
				break;
			}
			case MACRO_PDF417_OPTIONAL_FIELD_ADDRESSEE: {
				std::string addressee;
				codeIndex = DecodeMacroOptionalTextField(codewords, codeIndex + 1, addressee);
				resultMetadata.setAddressee(addressee);
				break;
			}
			case MACRO_PDF417_OPTIONAL_FIELD_SEGMENT_COUNT: {
				uint64_t segmentCount;
				codeIndex = DecodeMacroOptionalNumericField(codewords, codeIndex + 1, segmentCount);
				resultMetadata.setSegmentCount(narrow_cast<int>(segmentCount));
				break;
			}
			case MACRO_PDF417_OPTIONAL_FIELD_TIME_STAMP: {
				uint64_t timestamp;
				codeIndex = DecodeMacroOptionalNumericField(codewords, codeIndex + 1, timestamp);
				resultMetadata.setTimestamp(timestamp);
				break;
			}
			case MACRO_PDF417_OPTIONAL_FIELD_CHECKSUM: {
				uint64_t checksum;
				codeIndex = DecodeMacroOptionalNumericField(codewords, codeIndex + 1, checksum);
				resultMetadata.setChecksum(narrow_cast<int>(checksum));
				break;
			}
			case MACRO_PDF417_OPTIONAL_FIELD_FILE_SIZE: {
				uint64_t fileSize;
				codeIndex = DecodeMacroOptionalNumericField(codewords, codeIndex + 1, fileSize);
				resultMetadata.setFileSize(fileSize);
				break;
			}
			default: throw FormatError();
			}
			break;
		}
		case MACRO_PDF417_TERMINATOR: {
			codeIndex++;
			resultMetadata.setLastSegment(true);
			break;
		}
		default: throw FormatError();
		}
	}

	// copy optional fields to additional options
	if (optionalFieldsStart != -1) {
		int optionalFieldsLength = codeIndex - optionalFieldsStart;
		if (resultMetadata.isLastSegment())
			optionalFieldsLength--; // do not include terminator

		resultMetadata.setOptionalData(
			std::vector<int>(codewords.begin() + optionalFieldsStart, codewords.begin() + optionalFieldsStart + optionalFieldsLength));
	}

	return codeIndex;
}

DecoderResult Decode(const std::vector<int>& codewords)
{
	Content result;
	result.symbology = {'L', '2', char(-1)};

	bool readerInit = false;
	auto resultMetadata = std::make_shared<DecoderResultExtra>();

	try {
		for (int codeIndex = 1; codeIndex < codewords[0];) {
			int code = codewords[codeIndex++];
			switch (code) {
			case TEXT_COMPACTION_MODE_LATCH: codeIndex = TextCompaction(codewords, codeIndex, result); break;
				// This should only be encountered once in this loop, when default Text Compaction mode applies
				// (see default case below)
			case MODE_SHIFT_TO_BYTE_COMPACTION_MODE: codeIndex = TextCompaction(codewords, codeIndex - 1, result); break;
			case BYTE_COMPACTION_MODE_LATCH:
			case BYTE_COMPACTION_MODE_LATCH_6: codeIndex = ByteCompaction(code, codewords, codeIndex, result); break;
			case NUMERIC_COMPACTION_MODE_LATCH: codeIndex = NumericCompaction(codewords, codeIndex, result); break;
			case ECI_CHARSET:
			case ECI_GENERAL_PURPOSE:
			case ECI_USER_DEFINED: codeIndex = ProcessECI(codewords, codeIndex, codewords[0], code, result); break;
			case BEGIN_MACRO_PDF417_CONTROL_BLOCK: codeIndex = DecodeMacroBlock(codewords, codeIndex, *resultMetadata); break;
			case BEGIN_MACRO_PDF417_OPTIONAL_FIELD:
			case MACRO_PDF417_TERMINATOR:
				// Should not see these outside a macro block
				throw FormatError();
				break;
			case READER_INIT:
				if (codeIndex != 2) // Must be first codeword after symbol length (ISO/IEC 15438:2015 5.4.1.4)
					throw FormatError();
				else
					readerInit = true;
				break;
			case LINKAGE_EANUCC:
				if (codeIndex != 2) // Must be first codeword after symbol length (GS1 Composite ISO/IEC 24723:2010 4.3)
					throw FormatError();
				// TODO: handle else case
				break;
			case LINKAGE_OTHER:
				// Allowed to treat as invalid by ISO/IEC 24723:2010 5.4.1.5 and 5.4.6.1 when in Basic Channel Mode
				throw UnsupportedError("LINKAGE_OTHER, see ISO/IEC 15438:2015 5.4.1.5");
				break;
			default:
				if (code >= TEXT_COMPACTION_MODE_LATCH) { // Reserved codewords (all others in switch)
					// Allowed to treat as invalid by ISO/IEC 24723:2010 5.4.6.1 when in Basic Channel Mode
					throw UnsupportedError("Reserved codeword, see ISO/IEC 15438:2015 5.4.6.1");
				} else {
					// Default mode is Text Compaction mode Alpha sub-mode (ISO/IEC 15438:2015 5.4.2.1)
					codeIndex = TextCompaction(codewords, codeIndex - 1, result);
				}
				break;
			}
		}
	} catch (std::exception& e) {
		return FormatError(e.what());
	} catch (Error e) {
		return e;
	}

	if (result.empty() && resultMetadata->segmentIndex() == -1)
		return FormatError();

	StructuredAppendInfo sai;
	if (resultMetadata->segmentIndex() > -1) {
		sai.count = resultMetadata->segmentCount() != -1
						? resultMetadata->segmentCount()
						: (resultMetadata->isLastSegment() ? resultMetadata->segmentIndex() + 1 : 0);
		sai.index = resultMetadata->segmentIndex();
		sai.id    = resultMetadata->fileId();
	}

	return DecoderResult(std::move(result))
		.setStructuredAppend(sai)
		.setReaderInit(readerInit)
		.setExtra(resultMetadata);
}

} // namespace ZXing::Pdf417