ANSLibs/QRCode/qrcode/QREncoder.cpp

/*
* Copyright 2016 Huy Cuong Nguyen
* Copyright 2016 ZXing authors
*/
// SPDX-License-Identifier: Apache-2.0

#include "QREncoder.h"

#include "BitArray.h"
#include "ECI.h"
#include "GenericGF.h"
#include "QREncodeResult.h"
#include "QRErrorCorrectionLevel.h"
#include "QRMaskUtil.h"
#include "QRMatrixUtil.h"
#include "ReedSolomonEncoder.h"
#include "TextEncoder.h"
#include "ZXTestSupport.h"

#include <algorithm>
#include <array>
#include <limits>
#include <stdexcept>

namespace ZXing::QRCode {

static const CharacterSet DEFAULT_BYTE_MODE_ENCODING = CharacterSet::ISO8859_1;

// The original table is defined in the table 5 of JISX0510:2004 (p.19).
static const std::array<int, 16 * 6> ALPHANUMERIC_TABLE = {
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 0x00-0x0f
	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,  // 0x10-0x1f
	36, -1, -1, -1, 37, 38, -1, -1, -1, -1, 39, 40, -1, 41, 42, 43,  // 0x20-0x2f
	0,   1,  2,  3,  4,  5,  6,  7,  8,  9, 44, -1, -1, -1, -1, -1,  // 0x30-0x3f
	-1, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,  // 0x40-0x4f
	25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1,  // 0x50-0x5f
};

static bool IsOnlyDoubleByteKanji(const std::wstring& content)
{
	std::string bytes = TextEncoder::FromUnicode(content, CharacterSet::Shift_JIS);
	size_t length = bytes.length();
	if (length % 2 != 0) {
		return false;
	}
	for (size_t i = 0; i < length; i += 2) {
		int byte1 = bytes[i] & 0xff;
		if ((byte1 < 0x81 || byte1 > 0x9F) && (byte1 < 0xE0 || byte1 > 0xEB)) {
			return false;
		}
	}
	return true;
}

/**
* @return the code point of the table used in alphanumeric mode or
*  -1 if there is no corresponding code in the table.
*/
ZXING_EXPORT_TEST_ONLY
int GetAlphanumericCode(int code)
{
	if (code < Size(ALPHANUMERIC_TABLE)) {
		return ALPHANUMERIC_TABLE[code];
	}
	return -1;
}

/**
* Choose the best mode by examining the content. Note that 'encoding' is used as a hint;
* if it is Shift_JIS, and the input is only double-byte Kanji, then we return {@link Mode#KANJI}.
*/
ZXING_EXPORT_TEST_ONLY
CodecMode ChooseMode(const std::wstring& content, CharacterSet encoding)
{
	if (encoding == CharacterSet::Shift_JIS && IsOnlyDoubleByteKanji(content)) {
		// Choose Kanji mode if all input are double-byte characters
		return CodecMode::KANJI;
	}
	bool hasNumeric = false;
	bool hasAlphanumeric = false;
	for (wchar_t c : content) {
		if (c >= '0' && c <= '9') {
			hasNumeric = true;
		}
		else if (GetAlphanumericCode(c) != -1) {
			hasAlphanumeric = true;
		}
		else {
			return CodecMode::BYTE;
		}
	}
	if (hasAlphanumeric) {
		return CodecMode::ALPHANUMERIC;
	}
	if (hasNumeric) {
		return CodecMode::NUMERIC;
	}
	return CodecMode::BYTE;
}

/*
* See ISO/IEC 18004:2015 Table 4
*/
static void AppendECI(CharacterSet eci, BitArray& bits)
{
	int eciValue = ToInt(ToECI(eci));
	if (eciValue >= 0 && eciValue <= 999999) {
		bits.appendBits(static_cast<int>(CodecMode::ECI), 4);
		if (eciValue <= 127) {
			bits.appendBits(eciValue, 8);
		}
		else if (eciValue <= 16383) {
			bits.appendBits(0x8000 | eciValue, 16);
		}
		else {
			bits.appendBits(0xC00000 | eciValue, 24);
		}
	}
}

/**
* Append mode info. On success, store the result in "bits".
*/
ZXING_EXPORT_TEST_ONLY
void AppendModeInfo(CodecMode mode, BitArray& bits)
{
	bits.appendBits(static_cast<int>(mode), 4);
}


/**
* Append length info. On success, store the result in "bits".
*/
ZXING_EXPORT_TEST_ONLY
void AppendLengthInfo(int numLetters, const Version& version, CodecMode mode, BitArray& bits)
{
	int numBits = CharacterCountBits(mode, version);
	if (numLetters >= (1 << numBits)) {
		throw std::invalid_argument(std::to_string(numLetters) + " is bigger than " + std::to_string((1 << numBits) - 1));
	}
	bits.appendBits(numLetters, numBits);
}

ZXING_EXPORT_TEST_ONLY
void AppendNumericBytes(const std::wstring& content, BitArray& bits)
{
	size_t length = content.length();
	size_t i = 0;
	while (i < length) {
		int num1 = content[i] - '0';
		if (i + 2 < length) {
			// Encode three numeric letters in ten bits.
			int num2 = content[i + 1] - '0';
			int num3 = content[i + 2] - '0';
			bits.appendBits(num1 * 100 + num2 * 10 + num3, 10);
			i += 3;
		}
		else if (i + 1 < length) {
			// Encode two numeric letters in seven bits.
			int num2 = content[i + 1] - '0';
			bits.appendBits(num1 * 10 + num2, 7);
			i += 2;
		}
		else {
			// Encode one numeric letter in four bits.
			bits.appendBits(num1, 4);
			i++;
		}
	}
}

ZXING_EXPORT_TEST_ONLY
void AppendAlphanumericBytes(const std::wstring& content, BitArray& bits)
{
	size_t length = content.length();
	size_t i = 0;
	while (i < length) {
		int code1 = GetAlphanumericCode(content[i]);
		if (code1 == -1) {
			throw std::invalid_argument("Unexpected contents");
		}
		if (i + 1 < length) {
			int code2 = GetAlphanumericCode(content[i + 1]);
			if (code2 == -1) {
				throw std::invalid_argument("Unexpected contents");
			}
			// Encode two alphanumeric letters in 11 bits.
			bits.appendBits(code1 * 45 + code2, 11);
			i += 2;
		}
		else {
			// Encode one alphanumeric letter in six bits.
			bits.appendBits(code1, 6);
			i++;
		}
	}
}

ZXING_EXPORT_TEST_ONLY
void Append8BitBytes(const std::wstring& content, CharacterSet encoding, BitArray& bits)
{
	for (char b : TextEncoder::FromUnicode(content, encoding)) {
		bits.appendBits(b, 8);
	}
}

ZXING_EXPORT_TEST_ONLY
void AppendKanjiBytes(const std::wstring& content, BitArray& bits)
{
	std::string bytes = TextEncoder::FromUnicode(content, CharacterSet::Shift_JIS);
	int length = Size(bytes);
	if (length % 2 != 0) {
		throw std::invalid_argument("Kanji byte size not even");
	}
	--length;
	for (int i = 0; i < length; i += 2) {
		int byte1 = bytes[i] & 0xff;
		int byte2 = bytes[i + 1] & 0xff;
		int code = (byte1 << 8) | byte2;
		int subtracted = -1;
		if (code >= 0x8140 && code <= 0x9ffc) {
			subtracted = code - 0x8140;
		}
		else if (code >= 0xe040 && code <= 0xebbf) {
			subtracted = code - 0xc140;
		}
		if (subtracted == -1) {
			throw std::invalid_argument("Invalid byte sequence");
		}
		int encoded = ((subtracted >> 8) * 0xc0) + (subtracted & 0xff);
		bits.appendBits(encoded, 13);
	}
}

/**
* Append "bytes" in "mode" mode (encoding) into "bits". On success, store the result in "bits".
*/
ZXING_EXPORT_TEST_ONLY
void AppendBytes(const std::wstring& content, CodecMode mode, CharacterSet encoding, BitArray& bits)
{
	switch (mode) {
	case CodecMode::NUMERIC:      AppendNumericBytes(content, bits); break;
	case CodecMode::ALPHANUMERIC: AppendAlphanumericBytes(content, bits); break;
	case CodecMode::BYTE:         Append8BitBytes(content, encoding, bits); break;
	case CodecMode::KANJI:        AppendKanjiBytes(content, bits); break;
	default: throw std::invalid_argument("Invalid mode: " + std::to_string(static_cast<int>(mode)));
	}
}

/**
* @return true if the number of input bits will fit in a code with the specified version and
* error correction level.
*/
static bool WillFit(int numInputBits, const Version& version, ErrorCorrectionLevel ecLevel) {
	// In the following comments, we use numbers of Version 7-H.
	// numBytes = 196
	int numBytes = version.totalCodewords();
	// getNumECBytes = 130
	auto& ecBlocks = version.ecBlocksForLevel(ecLevel);
	int numEcBytes = ecBlocks.totalCodewords();
	// getNumDataBytes = 196 - 130 = 66
	int numDataBytes = numBytes - numEcBytes;
	int totalInputBytes = (numInputBits + 7) / 8;
	return numDataBytes >= totalInputBytes;
}

static const Version& ChooseVersion(int numInputBits, ErrorCorrectionLevel ecLevel)
{
	for (int versionNum = 1; versionNum <= 40; versionNum++) {
		const Version* version = Version::Model2(versionNum);
		if (WillFit(numInputBits, *version, ecLevel)) {
			return *version;
		}
	}
	throw std::invalid_argument("Data too big");
}

/**
* Terminate bits as described in 8.4.8 and 8.4.9 of JISX0510:2004 (p.24).
*/
ZXING_EXPORT_TEST_ONLY
void TerminateBits(int numDataBytes, BitArray& bits)
{
	int capacity = numDataBytes * 8;
	if (bits.size() > capacity) {
		throw std::invalid_argument("data bits cannot fit in the QR Code" + std::to_string(bits.size()) + " > "
									+ std::to_string(capacity));
	}
	for (int i = 0; i < 4 && bits.size() < capacity; ++i) {
		bits.appendBit(false);
	}
	// Append termination bits. See 8.4.8 of JISX0510:2004 (p.24) for details.
	// If the last byte isn't 8-bit aligned, we'll add padding bits.
	int numBitsInLastByte = bits.size() & 0x07;
	if (numBitsInLastByte > 0) {
		for (int i = numBitsInLastByte; i < 8; i++) {
			bits.appendBit(false);
		}
	}
	// If we have more space, we'll fill the space with padding patterns defined in 8.4.9 (p.24).
	int numPaddingBytes = numDataBytes - bits.sizeInBytes();
	for (int i = 0; i < numPaddingBytes; ++i) {
		bits.appendBits((i & 0x01) == 0 ? 0xEC : 0x11, 8);
	}
	if (bits.size() != capacity) {
		throw std::invalid_argument("Bits size does not equal capacity");
	}
}

struct BlockPair
{
	ByteArray dataBytes;
	ByteArray ecBytes;
};


/**
* Get number of data bytes and number of error correction bytes for block id "blockID". Store
* the result in "numDataBytesInBlock", and "numECBytesInBlock". See table 12 in 8.5.1 of
* JISX0510:2004 (p.30)
*/
ZXING_EXPORT_TEST_ONLY
void GetNumDataBytesAndNumECBytesForBlockID(int numTotalBytes, int numDataBytes, int numRSBlocks, int blockID,
											int& numDataBytesInBlock, int& numECBytesInBlock)
{
	if (blockID >= numRSBlocks) {
		throw std::invalid_argument("Block ID too large");
	}
	// numRsBlocksInGroup2 = 196 % 5 = 1
	int numRsBlocksInGroup2 = numTotalBytes % numRSBlocks;
	// numRsBlocksInGroup1 = 5 - 1 = 4
	int numRsBlocksInGroup1 = numRSBlocks - numRsBlocksInGroup2;
	// numTotalBytesInGroup1 = 196 / 5 = 39
	int numTotalBytesInGroup1 = numTotalBytes / numRSBlocks;
	// numTotalBytesInGroup2 = 39 + 1 = 40
	int numTotalBytesInGroup2 = numTotalBytesInGroup1 + 1;
	// numDataBytesInGroup1 = 66 / 5 = 13
	int numDataBytesInGroup1 = numDataBytes / numRSBlocks;
	// numDataBytesInGroup2 = 13 + 1 = 14
	int numDataBytesInGroup2 = numDataBytesInGroup1 + 1;
	// numEcBytesInGroup1 = 39 - 13 = 26
	int numEcBytesInGroup1 = numTotalBytesInGroup1 - numDataBytesInGroup1;
	// numEcBytesInGroup2 = 40 - 14 = 26
	int numEcBytesInGroup2 = numTotalBytesInGroup2 - numDataBytesInGroup2;
	// Sanity checks.
	// 26 = 26
	if (numEcBytesInGroup1 != numEcBytesInGroup2) {
		throw std::invalid_argument("EC bytes mismatch");
	}
	// 5 = 4 + 1.
	if (numRSBlocks != numRsBlocksInGroup1 + numRsBlocksInGroup2) {
		throw std::invalid_argument("RS blocks mismatch");
	}
	// 196 = (13 + 26) * 4 + (14 + 26) * 1
	if (numTotalBytes
		!= ((numDataBytesInGroup1 + numEcBytesInGroup1) * numRsBlocksInGroup1)
			   + ((numDataBytesInGroup2 + numEcBytesInGroup2) * numRsBlocksInGroup2)) {
		throw std::invalid_argument("Total bytes mismatch");
	}

	if (blockID < numRsBlocksInGroup1) {
		numDataBytesInBlock = numDataBytesInGroup1;
		numECBytesInBlock = numEcBytesInGroup1;
	}
	else {
		numDataBytesInBlock = numDataBytesInGroup2;
		numECBytesInBlock = numEcBytesInGroup2;
	}
}

ZXING_EXPORT_TEST_ONLY
void GenerateECBytes(const ByteArray& dataBytes, int numEcBytes, ByteArray& ecBytes)
{
	std::vector<int> message(dataBytes.size() + numEcBytes, 0);
	std::copy(dataBytes.begin(), dataBytes.end(), message.begin());
	ReedSolomonEncode(GenericGF::QRCodeField256(), message, numEcBytes);

	ecBytes.resize(numEcBytes);
	std::transform(message.end() - numEcBytes, message.end(), ecBytes.begin(), [](auto c) { return narrow_cast<uint8_t>(c); });
}


/**
* Interleave "bits" with corresponding error correction bytes. On success, store the result in
* "result". The interleave rule is complicated. See 8.6 of JISX0510:2004 (p.37) for details.
*/
ZXING_EXPORT_TEST_ONLY
BitArray InterleaveWithECBytes(const BitArray& bits, int numTotalBytes, int numDataBytes, int numRSBlocks)
{
	// "bits" must have "getNumDataBytes" bytes of data.
	if (bits.sizeInBytes() != numDataBytes) {
		throw std::invalid_argument("Number of bits and data bytes does not match");
	}

	// Step 1.  Divide data bytes into blocks and generate error correction bytes for them. We'll
	// store the divided data bytes blocks and error correction bytes blocks into "blocks".
	int dataBytesOffset = 0;
	int maxNumDataBytes = 0;
	int maxNumEcBytes = 0;

	// Since, we know the number of reedsolmon blocks, we can initialize the vector with the number.
	std::vector<BlockPair> blocks(numRSBlocks);

	for (int i = 0; i < numRSBlocks; ++i) {
		int numDataBytesInBlock = 0;
		int numEcBytesInBlock = 0;
		GetNumDataBytesAndNumECBytesForBlockID(numTotalBytes, numDataBytes, numRSBlocks, i, numDataBytesInBlock, numEcBytesInBlock);

		blocks[i].dataBytes = bits.toBytes(8 * dataBytesOffset, numDataBytesInBlock);
		GenerateECBytes(blocks[i].dataBytes, numEcBytesInBlock, blocks[i].ecBytes);

		maxNumDataBytes = std::max(maxNumDataBytes, numDataBytesInBlock);
		maxNumEcBytes = std::max(maxNumEcBytes, Size(blocks[i].ecBytes));
		dataBytesOffset += numDataBytesInBlock;
	}
	if (numDataBytes != dataBytesOffset) {
		throw std::invalid_argument("Data bytes does not match offset");
	}

	BitArray output;
	// First, place data blocks.
	for (int i = 0; i < maxNumDataBytes; ++i) {
		for (auto& block : blocks) {
			if (i < Size(block.dataBytes)) {
				output.appendBits(block.dataBytes[i], 8);
			}
		}
	}
	// Then, place error correction blocks.
	for (int i = 0; i < maxNumEcBytes; ++i) {
		for (auto& block : blocks) {
			if (i < Size(block.ecBytes)) {
				output.appendBits(block.ecBytes[i], 8);
			}
		}
	}
	if (numTotalBytes != output.sizeInBytes()) {  // Should be same.
		throw std::invalid_argument("Interleaving error: " + std::to_string(numTotalBytes) + " and " + std::to_string(output.sizeInBytes())
									+ " differ.");
	}
	return output;
}


static int ChooseMaskPattern(const BitArray& bits, ErrorCorrectionLevel ecLevel, const Version& version, TritMatrix& matrix)
{
	int minPenalty = std::numeric_limits<int>::max();  // Lower penalty is better.
	int bestMaskPattern = -1;
	// We try all mask patterns to choose the best one.
	for (int maskPattern = 0; maskPattern < NUM_MASK_PATTERNS; maskPattern++) {
		BuildMatrix(bits, ecLevel, version, maskPattern, matrix);
		int penalty = MaskUtil::CalculateMaskPenalty(matrix);
		if (penalty < minPenalty) {
			minPenalty = penalty;
			bestMaskPattern = maskPattern;
		}
	}
	return bestMaskPattern;
}

static int CalculateBitsNeeded(CodecMode mode, const BitArray& headerBits, const BitArray& dataBits, const Version& version)
{
	return headerBits.size() + CharacterCountBits(mode, version) + dataBits.size();
}

/**
* Decides the smallest version of QR code that will contain all of the provided data.
* @throws WriterException if the data cannot fit in any version
*/
static const Version& RecommendVersion(ErrorCorrectionLevel ecLevel, CodecMode mode, const BitArray& headerBits, const BitArray& dataBits)
{
	// Hard part: need to know version to know how many bits length takes. But need to know how many
	// bits it takes to know version. First we take a guess at version by assuming version will be
	// the minimum, 1:
	int provisionalBitsNeeded = CalculateBitsNeeded(mode, headerBits, dataBits, *Version::Model2(1));
	const Version& provisionalVersion = ChooseVersion(provisionalBitsNeeded, ecLevel);

	// Use that guess to calculate the right version. I am still not sure this works in 100% of cases.
	int bitsNeeded = CalculateBitsNeeded(mode, headerBits, dataBits, provisionalVersion);
	return ChooseVersion(bitsNeeded, ecLevel);
}

EncodeResult Encode(const std::wstring& content, ErrorCorrectionLevel ecLevel, CharacterSet charset, int versionNumber,
					bool useGs1Format, int maskPattern)
{
	bool charsetWasUnknown = charset == CharacterSet::Unknown;
	if (charsetWasUnknown) {
		charset = DEFAULT_BYTE_MODE_ENCODING;
	}

	// Pick an encoding mode appropriate for the content. Note that this will not attempt to use
	// multiple modes / segments even if that were more efficient. Twould be nice.
	CodecMode mode = ChooseMode(content, charset);

	// This will store the header information, like mode and
	// length, as well as "header" segments like an ECI segment.
	BitArray headerBits;

	// Append ECI segment if applicable
	if (mode == CodecMode::BYTE && !charsetWasUnknown) {
		AppendECI(charset, headerBits);
	}

	// Append the FNC1 mode header for GS1 formatted data if applicable
	if (useGs1Format) {
		// GS1 formatted codes are prefixed with a FNC1 in first position mode header
		AppendModeInfo(CodecMode::FNC1_FIRST_POSITION, headerBits);
	}

	// (With ECI in place,) Write the mode marker
	AppendModeInfo(mode, headerBits);

	// Collect data within the main segment, separately, to count its size if needed. Don't add it to
	// main payload yet.
	BitArray dataBits;
	AppendBytes(content, mode, charset, dataBits);

	const Version* version;
	if (versionNumber > 0) {
		version = Version::Model2(versionNumber);
		if (version != nullptr) {
			int bitsNeeded = CalculateBitsNeeded(mode, headerBits, dataBits, *version);
			if (!WillFit(bitsNeeded, *version, ecLevel)) {
				throw std::invalid_argument("Data too big for requested version");
			}
		}
		else {
			version = &RecommendVersion(ecLevel, mode, headerBits, dataBits);
		}
	}
	else {
		version = &RecommendVersion(ecLevel, mode, headerBits, dataBits);
	}

	BitArray headerAndDataBits;
	headerAndDataBits.appendBitArray(headerBits);
	// Find "length" of main segment and write it
	int numLetters = mode == CodecMode::BYTE ? dataBits.sizeInBytes() : Size(content);
	AppendLengthInfo(numLetters, *version, mode, headerAndDataBits);
	// Put data together into the overall payload
	headerAndDataBits.appendBitArray(dataBits);

	auto& ecBlocks = version->ecBlocksForLevel(ecLevel);
	int numDataBytes = version->totalCodewords() - ecBlocks.totalCodewords();

	// Terminate the bits properly.
	TerminateBits(numDataBytes, headerAndDataBits);

	// Interleave data bits with error correction code.
	BitArray finalBits =
		InterleaveWithECBytes(headerAndDataBits, version->totalCodewords(), numDataBytes, ecBlocks.numBlocks());

	EncodeResult output;
	output.ecLevel = ecLevel;
	output.mode = mode;
	output.version = version;

	//  Choose the mask pattern and set to "qrCode".
	int dimension = version->dimension();
	TritMatrix matrix(dimension, dimension);
	output.maskPattern = maskPattern != -1 ? maskPattern : ChooseMaskPattern(finalBits, ecLevel, *version, matrix);

	// Build the matrix and set it to "qrCode".
	BuildMatrix(finalBits, ecLevel, *version, output.maskPattern, matrix);

	output.matrix = ToBitMatrix(matrix);

	return output;
}

} // namespace ZXing::QRCode