QRCode/datamatrix/DMHighLevelEncoder.cpp

/*
* Copyright 2016 Huy Cuong Nguyen
* Copyright 2016 ZXing authors
*/
// SPDX-License-Identifier: Apache-2.0

#include "DMHighLevelEncoder.h"

#include "ByteArray.h"
#include "CharacterSet.h"
#include "DMEncoderContext.h"
#include "TextEncoder.h"
#include "ZXAlgorithms.h"

#include <algorithm>
#include <array>
#include <cmath>
#include <cstddef>
#include <cstdint>
#include <functional>
#include <limits>
#include <stdexcept>
#include <string>

namespace ZXing::DataMatrix {

static const uint8_t PAD = 129;
static const uint8_t UPPER_SHIFT = 235;
static const uint8_t MACRO_05 = 236;
static const uint8_t MACRO_06 = 237;
static const uint8_t C40_UNLATCH = 254;
static const uint8_t X12_UNLATCH = 254;

enum
{
	ASCII_ENCODATION,
	C40_ENCODATION,
	TEXT_ENCODATION,
	X12_ENCODATION,
	EDIFACT_ENCODATION,
	BASE256_ENCODATION,
};

static const uint8_t LATCHES[] = {
	0,	// ASCII mode, no latch needed
	230, // LATCH_TO_C40
	239, // LATCH_TO_TEXT
	238, // LATCH_TO_ANSIX12
	240, // LATCH_TO_EDIFACT
	231, // LATCH_TO_BASE256,
};

static bool IsDigit(int ch)
{
	return ch >= '0' && ch <= '9';
}

static bool IsExtendedASCII(int ch)
{
	return ch >= 128 && ch <= 255;
}

static bool IsNativeC40(int ch)
{
	return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z');
}

static bool IsNativeText(int ch)
{
	return (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'z');
}

static bool IsX12TermSep(int ch)
{
	return (ch == '\r') //CR
		|| (ch == '*')
		|| (ch == '>');
}

static bool IsNativeX12(int ch)
{
	return IsX12TermSep(ch) || (ch == ' ') || (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z');
}

static bool IsNativeEDIFACT(int ch)
{
	return ch >= ' ' && ch <= '^';
}

static bool IsSpecialB256(int /*ch*/)
{
	return false; //TODO NOT IMPLEMENTED YET!!!
}


/*
* Converts the message to a byte array using the default encoding (cp437) as defined by the
* specification
*
* @param msg the message
* @return the byte array of the message
*/

/*
public static byte[] getBytesForMessage(String msg) {
return msg.getBytes(Charset.forName("cp437")); //See 4.4.3 and annex B of ISO/IEC 15438:2001(E)
}
*/

static uint8_t Randomize253State(uint8_t ch, int codewordPosition)
{
	int pseudoRandom = ((149 * codewordPosition) % 253) + 1;
	int tempVariable = ch + pseudoRandom;
	return narrow_cast<uint8_t>(tempVariable <= 254 ? tempVariable : (tempVariable - 254));
}

static int FindMinimums(const std::array<int, 6>& intCharCounts, int min, std::array<int, 6>& mins)
{
	mins.fill(0);
	for (int i = 0; i < 6; i++) {
		int current = intCharCounts[i];
		if (min > current) {
			min = current;
			mins.fill(0);
		}
		if (min == current) {
			mins[i]++;
		}
	}
	return min;
}

static int LookAheadTest(const std::string& msg, size_t startpos, int currentMode)
{
	if (startpos >= msg.length()) {
		return currentMode;
	}
	std::array<float, 6> charCounts;
	//step J
	if (currentMode == ASCII_ENCODATION) {
		charCounts = { 0, 1, 1, 1, 1, 1.25f };
	}
	else {
		charCounts = { 1, 2, 2, 2, 2, 2.25f };
		charCounts[currentMode] = 0;
	}

	std::array<int, 6> mins;
	std::array<int, 6> intCharCounts;
	int charsProcessed = 0;
	while (true) {
		//step K
		if ((startpos + charsProcessed) == msg.length()) {
			int min = std::numeric_limits<int>::max();
			std::transform(charCounts.begin(), charCounts.end(), intCharCounts.begin(),
						   [](float x) { return static_cast<int>(std::ceil(x)); });
			min = FindMinimums(intCharCounts, min, mins);
			int minCount = Reduce(mins);

			if (intCharCounts[ASCII_ENCODATION] == min) {
				return ASCII_ENCODATION;
			}
			if (minCount == 1 && mins[BASE256_ENCODATION] > 0) {
				return BASE256_ENCODATION;
			}
			if (minCount == 1 && mins[EDIFACT_ENCODATION] > 0) {
				return EDIFACT_ENCODATION;
			}
			if (minCount == 1 && mins[TEXT_ENCODATION] > 0) {
				return TEXT_ENCODATION;
			}
			if (minCount == 1 && mins[X12_ENCODATION] > 0) {
				return X12_ENCODATION;
			}
			return C40_ENCODATION;
		}

		int c = (uint8_t)msg.at(startpos + charsProcessed);
		charsProcessed++;

		//step L
		if (IsDigit(c)) {
			charCounts[ASCII_ENCODATION] += 0.5f;
		}
		else if (IsExtendedASCII(c)) {
			charCounts[ASCII_ENCODATION] = std::ceil(charCounts[ASCII_ENCODATION]);
			charCounts[ASCII_ENCODATION] += 2.0f;
		}
		else {
			charCounts[ASCII_ENCODATION] = std::ceil(charCounts[ASCII_ENCODATION]);
			charCounts[ASCII_ENCODATION] += 1.0f;
		}

		//step M
		if (IsNativeC40(c)) {
			charCounts[C40_ENCODATION] += 2.0f / 3.0f;
		}
		else if (IsExtendedASCII(c)) {
			charCounts[C40_ENCODATION] += 8.0f / 3.0f;
		}
		else {
			charCounts[C40_ENCODATION] += 4.0f / 3.0f;
		}

		//step N
		if (IsNativeText(c)) {
			charCounts[TEXT_ENCODATION] += 2.0f / 3.0f;
		}
		else if (IsExtendedASCII(c)) {
			charCounts[TEXT_ENCODATION] += 8.0f / 3.0f;
		}
		else {
			charCounts[TEXT_ENCODATION] += 4.0f / 3.0f;
		}

		//step O
		if (IsNativeX12(c)) {
			charCounts[X12_ENCODATION] += 2.0f / 3.0f;
		}
		else if (IsExtendedASCII(c)) {
			charCounts[X12_ENCODATION] += 13.0f / 3.0f;
		}
		else {
			charCounts[X12_ENCODATION] += 10.0f / 3.0f;
		}

		//step P
		if (IsNativeEDIFACT(c)) {
			charCounts[EDIFACT_ENCODATION] += 3.0f / 4.0f;
		}
		else if (IsExtendedASCII(c)) {
			charCounts[EDIFACT_ENCODATION] += 17.0f / 4.0f;
		}
		else {
			charCounts[EDIFACT_ENCODATION] += 13.0f / 4.0f;
		}

		// step Q
		if (IsSpecialB256(c)) {
			charCounts[BASE256_ENCODATION] += 4.0f;
		}
		else {
			charCounts[BASE256_ENCODATION] += 1.0f;
		}

		//step R
		if (charsProcessed >= 4) {
			std::transform(charCounts.begin(), charCounts.end(), intCharCounts.begin(),
						   [](float x) { return static_cast<int>(std::ceil(x)); });
			FindMinimums(intCharCounts, std::numeric_limits<int>::max(), mins);
			int minCount = Reduce(mins);

			if (intCharCounts[ASCII_ENCODATION] < intCharCounts[BASE256_ENCODATION]
				&& intCharCounts[ASCII_ENCODATION] < intCharCounts[C40_ENCODATION]
				&& intCharCounts[ASCII_ENCODATION] < intCharCounts[TEXT_ENCODATION]
				&& intCharCounts[ASCII_ENCODATION] < intCharCounts[X12_ENCODATION]
				&& intCharCounts[ASCII_ENCODATION] < intCharCounts[EDIFACT_ENCODATION]) {
				return ASCII_ENCODATION;
			}
			if (intCharCounts[BASE256_ENCODATION] < intCharCounts[ASCII_ENCODATION]
				|| (mins[C40_ENCODATION] + mins[TEXT_ENCODATION] + mins[X12_ENCODATION] + mins[EDIFACT_ENCODATION]) == 0) {
				return BASE256_ENCODATION;
			}
			if (minCount == 1 && mins[EDIFACT_ENCODATION] > 0) {
				return EDIFACT_ENCODATION;
			}
			if (minCount == 1 && mins[TEXT_ENCODATION] > 0) {
				return TEXT_ENCODATION;
			}
			if (minCount == 1 && mins[X12_ENCODATION] > 0) {
				return X12_ENCODATION;
			}
			if (intCharCounts[C40_ENCODATION] + 1 < intCharCounts[ASCII_ENCODATION]
				&& intCharCounts[C40_ENCODATION] + 1 < intCharCounts[BASE256_ENCODATION]
				&& intCharCounts[C40_ENCODATION] + 1 < intCharCounts[EDIFACT_ENCODATION]
				&& intCharCounts[C40_ENCODATION] + 1 < intCharCounts[TEXT_ENCODATION]) {
				if (intCharCounts[C40_ENCODATION] < intCharCounts[X12_ENCODATION]) {
					return C40_ENCODATION;
				}
				if (intCharCounts[C40_ENCODATION] == intCharCounts[X12_ENCODATION]) {
					size_t p = startpos + charsProcessed + 1;
					while (p < msg.length()) {
						int tc = msg.at(p);
						if (IsX12TermSep(tc)) {
							return X12_ENCODATION;
						}
						if (!IsNativeX12(tc)) {
							break;
						}
						p++;
					}
					return C40_ENCODATION;
				}
			}
		}
	}
}

static std::string ToHexString(int c)
{
	const char* digits = "0123456789abcdef";
	std::string val(4, '0');
	val[1] = 'x';
	val[2] = digits[(c >> 4) & 0xf];
	val[3] = digits[c & 0xf];
	return val;
}

namespace ASCIIEncoder {
	/**
	* Determines the number of consecutive characters that are encodable using numeric compaction.
	*
	* @param msg      the message
	* @param startpos the start position within the message
	* @return the requested character count
	*/
	static int DetermineConsecutiveDigitCount(const std::string& msg, int startpos)
	{
		auto begin = msg.begin() + startpos;
		return narrow_cast<int>(std::find_if_not(begin, msg.end(), IsDigit) - begin);
	}

	static uint8_t EncodeASCIIDigits(int digit1, int digit2)
	{
		if (IsDigit(digit1) && IsDigit(digit2)) {
			int num = (digit1 - '0') * 10 + (digit2 - '0');
			return static_cast<uint8_t>(num + 130);
		}
		return '?';
	}

	static void EncodeASCII(EncoderContext& context)
	{
		//step B
		int n = DetermineConsecutiveDigitCount(context.message(), context.currentPos());
		if (n >= 2) {
			context.addCodeword(EncodeASCIIDigits(context.currentChar(), context.nextChar()));
			context.setCurrentPos(context.currentPos() + 2);
		}
		else {
			int c = context.currentChar();
			int newMode = LookAheadTest(context.message(), context.currentPos(), ASCII_ENCODATION);
			if (newMode != ASCII_ENCODATION)
			{
				// the order here is the same as ENCODATION;
				context.addCodeword(LATCHES[newMode]);
				context.setNewEncoding(newMode);
			}
			else if (IsExtendedASCII(c)) {
				context.addCodeword(UPPER_SHIFT);
				context.addCodeword(static_cast<uint8_t>(c - 128 + 1));
				context.setCurrentPos(context.currentPos() + 1);
			}
			else {
				context.addCodeword(static_cast<uint8_t>(c + 1));
				context.setCurrentPos(context.currentPos() + 1);
			}
		}
	}

} // ASCIIEncoder

namespace C40Encoder {

	static int EncodeChar(int c, std::string& sb)
	{
		if (c == ' ') {
			sb.push_back('\3');
			return 1;
		}
		if (c >= '0' && c <= '9') {
			sb.push_back((char)(c - 48 + 4));
			return 1;
		}
		if (c >= 'A' && c <= 'Z') {
			sb.push_back((char)(c - 65 + 14));
			return 1;
		}
		if (c >= '\0' && c <= '\x1f') {
			sb.push_back('\0'); //Shift 1 Set
			sb.push_back(c);
			return 2;
		}
		if (c <= '/') {
			sb.push_back('\1'); //Shift 2 Set
			sb.push_back((char)(c - 33));
			return 2;
		}
		if (c <= '@') {
			sb.push_back('\1'); //Shift 2 Set
			sb.push_back((char)(c - 58 + 15));
			return 2;
		}
		if (c <= '_') {
			sb.push_back('\1'); //Shift 2 Set
			sb.push_back((char)(c - 91 + 22));
			return 2;
		}
		if (c <= '\x7f') {
			sb.push_back('\2'); //Shift 3 Set
			sb.push_back((char)(c - 96));
			return 2;
		}
		sb.append("\1\x1e"); //Shift 2, Upper Shift
		int len = 2;
		len += EncodeChar((char)(c - 0x80), sb);
		return len;
	}

	static int BacktrackOneCharacter(EncoderContext& context, std::string& buffer, std::string& removed, int lastCharSize,
									 std::function<int(int, std::string&)> encodeChar)
	{
		buffer.resize(buffer.size() - lastCharSize);
		context.setCurrentPos(context.currentPos() - 1);
		int c = context.currentChar();
		lastCharSize = encodeChar(c, removed);
		context.resetSymbolInfo(); //Deal with possible reduction in symbol size
		return lastCharSize;
	}

	static void EncodeToCodewords(EncoderContext& context, const std::string& sb, int startPos) {
		int c1 = sb.at(startPos);
		int c2 = sb.at(startPos + 1);
		int c3 = sb.at(startPos + 2);
		int v = (1600 * c1) + (40 * c2) + c3 + 1;
		context.addCodeword(narrow_cast<uint8_t>(v / 256));
		context.addCodeword(narrow_cast<uint8_t>(v % 256));
	}

	static void WriteNextTriplet(EncoderContext& context, std::string& buffer)
	{
		EncodeToCodewords(context, buffer, 0);
		buffer.erase(0, 3);
	}

	/**
	* Handle "end of data" situations
	*
	* @param context the encoder context
	* @param buffer  the buffer with the remaining encoded characters
	*/
	static void HandleEOD(EncoderContext& context, std::string& buffer)
	{
		int unwritten = (Size(buffer) / 3) * 2;
		int rest = Size(buffer) % 3;

		int curCodewordCount = context.codewordCount() + unwritten;
		auto symbolInfo = context.updateSymbolInfo(curCodewordCount);
		int available = symbolInfo->dataCapacity() - curCodewordCount;

		if (rest == 2) {
			buffer.push_back('\0'); //Shift 1
			while (buffer.length() >= 3) {
				WriteNextTriplet(context, buffer);
			}
			if (context.hasMoreCharacters()) {
				context.addCodeword(C40_UNLATCH);
			}
		}
		else if (available == 1 && rest == 1) {
			while (buffer.length() >= 3) {
				WriteNextTriplet(context, buffer);
			}
			if (context.hasMoreCharacters()) {
				context.addCodeword(C40_UNLATCH);
			}
			// else no unlatch
			context.setCurrentPos(context.currentPos() - 1);
		}
		else if (rest == 0) {
			while (buffer.length() >= 3) {
				WriteNextTriplet(context, buffer);
			}
			if (available > 0 || context.hasMoreCharacters()) {
				context.addCodeword(C40_UNLATCH);
			}
		}
		else {
			throw std::logic_error("Unexpected case. Please report!");
		}
		context.setNewEncoding(ASCII_ENCODATION);
	}

	static void EncodeC40(EncoderContext& context, std::function<int (int, std::string&)> encodeChar, int encodingMode)
	{
		//step C
		std::string buffer;
		while (context.hasMoreCharacters()) {
			int c = context.currentChar();
			context.setCurrentPos(context.currentPos() + 1);
			int lastCharSize = encodeChar(c, buffer);
			int unwritten = narrow_cast<int>(buffer.length() / 3) * 2;
			int curCodewordCount = context.codewordCount() + unwritten;
			auto symbolInfo = context.updateSymbolInfo(curCodewordCount);
			int available = symbolInfo->dataCapacity() - curCodewordCount;

			if (!context.hasMoreCharacters()) {
				//Avoid having a single C40 value in the last triplet
				std::string removed;
				if ((buffer.length() % 3) == 2 && available != 2) {
					lastCharSize = BacktrackOneCharacter(context, buffer, removed, lastCharSize, encodeChar);
				}
				while ((buffer.length() % 3) == 1 && ((lastCharSize <= 3 && available != 1) || lastCharSize > 3)) {
					lastCharSize = BacktrackOneCharacter(context, buffer, removed, lastCharSize, encodeChar);
				}
				break;
			}

			if ((buffer.length() % 3) == 0) {
				int newMode = LookAheadTest(context.message(), context.currentPos(), encodingMode);
				if (newMode != encodingMode) {
					// Return to ASCII encodation, which will actually handle latch to new mode
					context.setNewEncoding(ASCII_ENCODATION);
					break;
				}
			}
		}
		return HandleEOD(context, buffer);
	}

	static void EncodeC40(EncoderContext& context)
	{
		EncodeC40(context, EncodeChar, C40_ENCODATION);
	}

} // C40Encoder

namespace DMTextEncoder {

	static int EncodeChar(int c, std::string& sb)
	{
		if (c == ' ') {
			sb.push_back('\3');
			return 1;
		}
		if (c >= '0' && c <= '9') {
			sb.push_back((char)(c - 48 + 4));
			return 1;
		}
		if (c >= 'a' && c <= 'z') {
			sb.push_back((char)(c - 97 + 14));
			return 1;
		}
		if (c >= '\0' && c <= '\x1f') {
			sb.push_back('\0'); //Shift 1 Set
			sb.push_back(c);
			return 2;
		}
		if (c <= '/') {
			sb.push_back('\1'); //Shift 2 Set
			sb.push_back((char)(c - 33));
			return 2;
		}
		if (c <= '@') {
			sb.push_back('\1'); //Shift 2 Set
			sb.push_back((char)(c - 58 + 15));
			return 2;
		}
		if (c >= '[' && c <= '_') {
			sb.push_back('\1'); //Shift 2 Set
			sb.push_back((char)(c - 91 + 22));
			return 2;
		}
		if (c == '\x60') {
			sb.push_back('\2'); //Shift 3 Set
			sb.push_back((char)(c - 96));
			return 2;
		}
		if (c <= 'Z') {
			sb.push_back('\2'); //Shift 3 Set
			sb.push_back((char)(c - 65 + 1));
			return 2;
		}
		if (c <= '\x7f') {
			sb.push_back('\2'); //Shift 3 Set
			sb.push_back((char)(c - 123 + 27));
			return 2;
		}
		sb.append("\1\x1e"); //Shift 2, Upper Shift
		int len = 2;
		len += EncodeChar(c - 128, sb);
		return len;
	}

	static void EncodeText(EncoderContext& context)
	{
		C40Encoder::EncodeC40(context, EncodeChar, TEXT_ENCODATION);
	}

} // DMTextEncoder

namespace X12Encoder {

	static int EncodeChar(int c, std::string& sb)
	{
		switch (c) {
		case '\r': sb.push_back('\0'); break;
		case '*': sb.push_back('\1'); break;
		case '>': sb.push_back('\2'); break;
		case ' ': sb.push_back('\3'); break;
		default:
			if (c >= '0' && c <= '9') {
				sb.push_back((char)(c - 48 + 4));
			} else if (c >= 'A' && c <= 'Z') {
				sb.push_back((char)(c - 65 + 14));
			} else {
				throw std::invalid_argument("Illegal character: " + ToHexString(c));
			}
			break;
		}
		return 1;
	}

	static void HandleEOD(EncoderContext& context, std::string& buffer)
	{
		int codewordCount = context.codewordCount();
		auto symbolInfo = context.updateSymbolInfo(codewordCount);
		int available = symbolInfo->dataCapacity() - codewordCount;
		context.setCurrentPos(context.currentPos() - Size(buffer));
		if (context.remainingCharacters() > 1 || available > 1 || context.remainingCharacters() != available) {
			context.addCodeword(X12_UNLATCH);
		}
		if (context.newEncoding() < 0) {
			context.setNewEncoding(ASCII_ENCODATION);
		}
	}

	static void EncodeX12(EncoderContext& context)
	{
		//step C
		std::string buffer;
		while (context.hasMoreCharacters()) {
			int c = context.currentChar();
			context.setCurrentPos(context.currentPos() + 1);
			EncodeChar(c, buffer);
			size_t count = buffer.length();
			if ((count % 3) == 0) {
				C40Encoder::WriteNextTriplet(context, buffer);

				int newMode = LookAheadTest(context.message(), context.currentPos(), X12_ENCODATION);
				if (newMode != X12_ENCODATION) {
					// Return to ASCII encodation, which will actually handle latch to new mode
					context.setNewEncoding(ASCII_ENCODATION);
					break;
				}
			}
		}
		HandleEOD(context, buffer);
	}
	
} // X12Encoder

namespace EdifactEncoder {

	static void EncodeChar(int c, std::string& sb)
	{
		if (c >= ' ' && c <= '?') {
			sb.push_back(c);
		}
		else if (c >= '@' && c <= '^') {
			sb.push_back((char)(c - 64));
		}
		else {
			throw std::invalid_argument("Illegal character: " + ToHexString(c));
		}
	}

	static ByteArray EncodeToCodewords(const std::string& sb, int startPos)
	{
		int len = Size(sb) - startPos;
		if (len == 0) {
			throw std::invalid_argument("buffer must not be empty");
		}
		int c1 = sb.at(startPos);
		int c2 = len >= 2 ? sb.at(startPos + 1) : 0;
		int c3 = len >= 3 ? sb.at(startPos + 2) : 0;
		int c4 = len >= 4 ? sb.at(startPos + 3) : 0;

		int v = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4;
		uint8_t cw1 = (v >> 16) & 255;
		uint8_t cw2 = (v >> 8) & 255;
		uint8_t cw3 = v & 255;
		ByteArray res;
		res.reserve(3);
		res.push_back(cw1);
		if (len >= 2) {
			res.push_back(cw2);
		}
		if (len >= 3) {
			res.push_back(cw3);
		}
		return res;
	}

	/**
	* Handle "end of data" situations
	*
	* @param context the encoder context
	* @param buffer  the buffer with the remaining encoded characters
	*/
	static void HandleEOD(EncoderContext& context, std::string& buffer)
	{
		try {
			size_t count = buffer.length();
			if (count == 0) {
				return; //Already finished
			}
			if (count == 1) {
				//Only an unlatch at the end
				int codewordCount = context.codewordCount();
				auto symbolInfo = context.updateSymbolInfo(codewordCount);
				int available = symbolInfo->dataCapacity() - codewordCount;
				int remaining = context.remainingCharacters();
				// The following two lines are a hack inspired by the 'fix' from https://sourceforge.net/p/barcode4j/svn/221/
				if (remaining > available)
					available = context.updateSymbolInfo(codewordCount+1)->dataCapacity() - codewordCount;
				if (remaining <= available && available <= 2) {
					return; //No unlatch
				}
			}

			if (count > 4) {
				throw std::invalid_argument("Count must not exceed 4");
			}
			int restChars = static_cast<int>(count - 1);
			auto encoded = EncodeToCodewords(buffer, 0);
			bool endOfSymbolReached = !context.hasMoreCharacters();
			bool restInAscii = endOfSymbolReached && restChars <= 2;

			if (restChars <= 2) {
				int codewordCount = context.codewordCount();
				auto symbolInfo = context.updateSymbolInfo(codewordCount + restChars);
				int available = symbolInfo->dataCapacity() - codewordCount;
				if (available >= 3) {
					restInAscii = false;
					context.updateSymbolInfo(codewordCount + Size(encoded));
					//available = context.symbolInfo.dataCapacity - context.getCodewordCount();
				}
			}

			if (restInAscii) {
				context.resetSymbolInfo();
				context.setCurrentPos(context.currentPos() - restChars);
			}
			else {
				for (uint8_t cw : encoded) {
					context.addCodeword(cw);
				}
			}
		}
		catch (...) {
			context.setNewEncoding(ASCII_ENCODATION);
			throw;
		}
		context.setNewEncoding(ASCII_ENCODATION);
	}

	static void EncodeEdifact(EncoderContext& context)
	{
		//step F
		std::string buffer;
		while (context.hasMoreCharacters()) {
			int c = context.currentChar();
			EncodeChar(c, buffer);
			context.setCurrentPos(context.currentPos() + 1);

			if (buffer.length() >= 4) {
				auto codewords = EncodeToCodewords(buffer, 0);
				for (uint8_t cw : codewords) {
					context.addCodeword(cw);
				}
				buffer.erase(0, 4);

				int newMode = LookAheadTest(context.message(), context.currentPos(), EDIFACT_ENCODATION);
				if (newMode != EDIFACT_ENCODATION) {
					// Return to ASCII encodation, which will actually handle latch to new mode
					context.setNewEncoding(ASCII_ENCODATION);
					break;
				}
			}
		}
		buffer.push_back(31); //Unlatch
		HandleEOD(context, buffer);
	}

} // EdifactEncoder

namespace Base256Encoder {

	static int Randomize255State(int ch, int codewordPosition)
	{
		int pseudoRandom = ((149 * codewordPosition) % 255) + 1;
		int tempVariable = ch + pseudoRandom;
		if (tempVariable <= 255) {
			return tempVariable;
		}
		else {
			return tempVariable - 256;
		}
	}

	static void EncodeBase256(EncoderContext& context)
	{
		std::string buffer;
		buffer.push_back('\0'); //Initialize length field
		while (context.hasMoreCharacters()) {
			int c = context.currentChar();
			buffer.push_back(c);

			context.setCurrentPos(context.currentPos() + 1);

			int newMode = LookAheadTest(context.message(), context.currentPos(), BASE256_ENCODATION);
			if (newMode != BASE256_ENCODATION) {
				// Return to ASCII encodation, which will actually handle latch to new mode
				context.setNewEncoding(ASCII_ENCODATION);
				break;
			}
		}
		int dataCount = Size(buffer) - 1;
		int lengthFieldSize = 1;
		int currentSize = context.codewordCount() + dataCount + lengthFieldSize;
		auto symbolInfo = context.updateSymbolInfo(currentSize);
		bool mustPad = (symbolInfo->dataCapacity() - currentSize) > 0;
		if (context.hasMoreCharacters() || mustPad) {
			if (dataCount <= 249) {
				buffer.at(0) = (char)dataCount;
			}
			else if (dataCount <= 1555) {
				buffer.at(0) = (char)((dataCount / 250) + 249);
				buffer.insert(1, 1, (char)(dataCount % 250));
			}
			else {
				throw std::invalid_argument("Message length not in valid ranges: " + std::to_string(dataCount));
			}
		}
		for (char c : buffer) {
			context.addCodeword(Randomize255State(c, context.codewordCount() + 1));
		}
	}

} // Base256Encoder

//TODO: c++20
static bool StartsWith(std::wstring_view s, std::wstring_view ss)
{
	return s.length() > ss.length() && s.compare(0, ss.length(), ss) == 0;
}

static bool EndsWith(std::wstring_view s, std::wstring_view ss)
{
	return s.length() > ss.length() && s.compare(s.length() - ss.length(), ss.length(), ss) == 0;
}

ByteArray Encode(const std::wstring& msg)
{
	return Encode(msg, CharacterSet::ISO8859_1, SymbolShape::NONE, -1, -1, -1, -1);
}

/**
* Performs message encoding of a DataMatrix message using the algorithm described in annex P
* of ISO/IEC 16022:2000(E).
*
* @param msg     the message
* @param shape   requested shape. May be {@code SymbolShapeHint.FORCE_NONE},
*                {@code SymbolShapeHint.FORCE_SQUARE} or {@code SymbolShapeHint.FORCE_RECTANGLE}.
* @param minSize the minimum symbol size constraint or null for no constraint
* @param maxSize the maximum symbol size constraint or null for no constraint
* @return the encoded message (the char values range from 0 to 255)
*/
ByteArray Encode(const std::wstring& msg, CharacterSet charset, SymbolShape shape, int minWidth, int minHeight, int maxWidth, int maxHeight)
{
	//the codewords 0..255 are encoded as Unicode characters
	//Encoder[] encoders = {
	//	new ASCIIEncoder(), new C40Encoder(), new TextEncoder(),
	//	new X12Encoder(), new EdifactEncoder(),  new Base256Encoder()
	//};

	if (charset == CharacterSet::Unknown) {
		charset = CharacterSet::ISO8859_1;
	}

	EncoderContext context(TextEncoder::FromUnicode(msg, charset));
	context.setSymbolShape(shape);
	context.setSizeConstraints(minWidth, minHeight, maxWidth, maxHeight);

	constexpr std::wstring_view MACRO_05_HEADER = L"[)>\x1E""05\x1D";
	constexpr std::wstring_view MACRO_06_HEADER = L"[)>\x1E""06\x1D";
	constexpr std::wstring_view MACRO_TRAILER = L"\x1E\x04";

	if (StartsWith(msg, MACRO_05_HEADER) && EndsWith(msg, MACRO_TRAILER)) {
		context.addCodeword(MACRO_05);
		context.setSkipAtEnd(2);
		context.setCurrentPos(Size(MACRO_05_HEADER));
	}
	else if (StartsWith(msg, MACRO_06_HEADER) && EndsWith(msg, MACRO_TRAILER)) {
		context.addCodeword(MACRO_06);
		context.setSkipAtEnd(2);
		context.setCurrentPos(Size(MACRO_06_HEADER));
	}

	int encodingMode = ASCII_ENCODATION; //Default mode
	while (context.hasMoreCharacters()) {
		switch (encodingMode) {
		case ASCII_ENCODATION:   ASCIIEncoder::EncodeASCII(context);     break;
		case C40_ENCODATION:     C40Encoder::EncodeC40(context);         break;
		case TEXT_ENCODATION:    DMTextEncoder::EncodeText(context);     break;
		case X12_ENCODATION:     X12Encoder::EncodeX12(context);         break;
		case EDIFACT_ENCODATION: EdifactEncoder::EncodeEdifact(context); break;
		case BASE256_ENCODATION: Base256Encoder::EncodeBase256(context); break;
		}
		if (context.newEncoding() >= 0) {
			encodingMode = context.newEncoding();
			context.clearNewEncoding();
		}
	}
	int len = context.codewordCount();
	auto symbolInfo = context.updateSymbolInfo(len);
	int capacity = symbolInfo->dataCapacity();
	if (len < capacity) {
		if (encodingMode != ASCII_ENCODATION && encodingMode != BASE256_ENCODATION && encodingMode != EDIFACT_ENCODATION) {
			context.addCodeword('\xfe'); //Unlatch (254)
		}
	}
	//Padding
	if (context.codewordCount() < capacity) {
		context.addCodeword(PAD);
	}
	while (context.codewordCount() < capacity) {
		context.addCodeword(Randomize253State(PAD, context.codewordCount() + 1));
	}

	return context.codewords();
}

} // namespace ZXing::DataMatrix