470 lines
15 KiB
C++
470 lines
15 KiB
C++
/*
|
|
* Copyright 2016 Nu-book Inc.
|
|
* Copyright 2016 ZXing authors
|
|
*/
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
|
|
#include "DMDecoder.h"
|
|
|
|
#include "BitMatrix.h"
|
|
#include "BitSource.h"
|
|
#include "DMBitLayout.h"
|
|
#include "DMDataBlock.h"
|
|
#include "DMVersion.h"
|
|
#include "DecoderResult.h"
|
|
#include "GenericGF.h"
|
|
#include "ReedSolomonDecoder.h"
|
|
#include "ZXAlgorithms.h"
|
|
#include "ZXTestSupport.h"
|
|
|
|
#include <algorithm>
|
|
#include <array>
|
|
#include <optional>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
namespace ZXing::DataMatrix {
|
|
|
|
/**
|
|
* <p>Data Matrix Codes can encode text as bits in one of several modes, and can use multiple modes
|
|
* in one Data Matrix Code. This class decodes the bits back into text.</p>
|
|
*
|
|
* <p>See ISO 16022:2006, 5.2.1 - 5.2.9.2</p>
|
|
*
|
|
* @author bbrown@google.com (Brian Brown)
|
|
* @author Sean Owen
|
|
*/
|
|
namespace DecodedBitStreamParser {
|
|
|
|
/**
|
|
* See ISO 16022:2006, Annex C Table C.1
|
|
* The C40 Basic Character Set (*'s used for placeholders for the shift values)
|
|
*/
|
|
static const char C40_BASIC_SET_CHARS[] = {
|
|
'*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
|
'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
|
|
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z'
|
|
};
|
|
|
|
static const char C40_SHIFT2_SET_CHARS[] = {
|
|
'!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.',
|
|
'/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '_', 29 // FNC1->29
|
|
};
|
|
|
|
/**
|
|
* See ISO 16022:2006, Annex C Table C.2
|
|
* The Text Basic Character Set (*'s used for placeholders for the shift values)
|
|
*/
|
|
static const char TEXT_BASIC_SET_CHARS[] = {
|
|
'*', '*', '*', ' ', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
|
|
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
|
|
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'
|
|
};
|
|
|
|
// Shift 2 for Text is the same encoding as C40
|
|
#define TEXT_SHIFT2_SET_CHARS C40_SHIFT2_SET_CHARS
|
|
|
|
static const char TEXT_SHIFT3_SET_CHARS[] = {
|
|
'`', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
|
|
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '{', '|', '}', '~', 127
|
|
};
|
|
|
|
struct Shift128
|
|
{
|
|
bool set = false;
|
|
char operator()(int val) { return static_cast<char>(val + std::exchange(set, false) * 128); }
|
|
};
|
|
|
|
/**
|
|
* See ISO 16022:2006, 5.4.1, Table 6
|
|
*/
|
|
static ECI ParseECIValue(BitSource& bits)
|
|
{
|
|
int firstByte = bits.readBits(8);
|
|
if (firstByte <= 127)
|
|
return ECI(firstByte - 1);
|
|
|
|
int secondByte = bits.readBits(8);
|
|
if (firstByte <= 191)
|
|
return ECI((firstByte - 128) * 254 + 127 + secondByte - 1);
|
|
|
|
int thirdByte = bits.readBits(8);
|
|
|
|
return ECI((firstByte - 192) * 64516 + 16383 + (secondByte - 1) * 254 + thirdByte - 1);
|
|
}
|
|
|
|
/**
|
|
* See ISO 16022:2006, 5.6
|
|
*/
|
|
static void ParseStructuredAppend(BitSource& bits, StructuredAppendInfo& sai)
|
|
{
|
|
// 5.6.2 Table 8
|
|
int symbolSequenceIndicator = bits.readBits(8);
|
|
sai.index = symbolSequenceIndicator >> 4;
|
|
sai.count = 17 - (symbolSequenceIndicator & 0x0F); // 2-16 permitted, 17 invalid
|
|
|
|
if (sai.count == 17 || sai.count <= sai.index) // If info doesn't make sense
|
|
sai.count = 0; // Choose to mark count as unknown
|
|
|
|
int fileId1 = bits.readBits(8); // File identification 1
|
|
int fileId2 = bits.readBits(8); // File identification 2
|
|
|
|
// There's no conversion method or meaning given to the 2 file id codewords in Section 5.6.3, apart from
|
|
// saying that each value should be 1-254. Choosing here to represent them as base 256.
|
|
sai.id = std::to_string((fileId1 << 8) | fileId2);
|
|
}
|
|
|
|
std::optional<std::array<int, 3>> DecodeNextTriple(BitSource& bits)
|
|
{
|
|
// Values are encoded in a 16-bit value as (1600 * C1) + (40 * C2) + C3 + 1
|
|
// If there is less than 2 bytes left or the next byte is the unlatch codeword then the current segment has ended
|
|
if (bits.available() < 16)
|
|
return {};
|
|
int firstByte = bits.readBits(8);
|
|
if (firstByte == 254) // Unlatch codeword
|
|
return {};
|
|
|
|
int fullBitValue = (firstByte << 8) + bits.readBits(8) - 1;
|
|
int a = fullBitValue / 1600;
|
|
fullBitValue -= a * 1600;
|
|
int b = fullBitValue / 40;
|
|
int c = fullBitValue - b * 40;
|
|
|
|
return {{a, b, c}};
|
|
}
|
|
|
|
enum class Mode {C40, TEXT};
|
|
|
|
/**
|
|
* See ISO 16022:2006, 5.2.5 and Annex C, Table C.1 (C40)
|
|
* See ISO 16022:2006, 5.2.6 and Annex C, Table C.2 (Text)
|
|
*/
|
|
static void DecodeC40OrTextSegment(BitSource& bits, Content& result, Mode mode)
|
|
{
|
|
// TODO(bbrown): The Upper Shift with C40 doesn't work in the 4 value scenario all the time
|
|
Shift128 upperShift;
|
|
int shift = 0;
|
|
|
|
const char* BASIC_SET_CHARS = mode == Mode::C40 ? C40_BASIC_SET_CHARS : TEXT_BASIC_SET_CHARS;
|
|
const char* SHIFT_SET_CHARS = mode == Mode::C40 ? C40_SHIFT2_SET_CHARS : TEXT_SHIFT2_SET_CHARS;
|
|
|
|
while (auto triple = DecodeNextTriple(bits)) {
|
|
for (int cValue : *triple) {
|
|
switch (std::exchange(shift, 0)) {
|
|
case 0:
|
|
if (cValue < 3)
|
|
shift = cValue + 1;
|
|
else if (cValue < 40) // Size(BASIC_SET_CHARS)
|
|
result.push_back(upperShift(BASIC_SET_CHARS[cValue]));
|
|
else
|
|
throw FormatError("invalid value in C40 or Text segment");
|
|
break;
|
|
case 1: result.push_back(upperShift(cValue)); break;
|
|
case 2:
|
|
if (cValue < 28) // Size(SHIFT_SET_CHARS))
|
|
result.push_back(upperShift(SHIFT_SET_CHARS[cValue]));
|
|
else if (cValue == 30) // Upper Shift
|
|
upperShift.set = true;
|
|
else
|
|
throw FormatError("invalid value in C40 or Text segment");
|
|
break;
|
|
case 3:
|
|
if (mode == Mode::C40)
|
|
result.push_back(upperShift(cValue + 96));
|
|
else if (cValue < Size(TEXT_SHIFT3_SET_CHARS))
|
|
result.push_back(upperShift(TEXT_SHIFT3_SET_CHARS[cValue]));
|
|
else
|
|
throw FormatError("invalid value in C40 or Text segment");
|
|
break;
|
|
default: throw FormatError("invalid value in C40 or Text segment"); ;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* See ISO 16022:2006, 5.2.7
|
|
*/
|
|
static void DecodeAnsiX12Segment(BitSource& bits, Content& result)
|
|
{
|
|
while (auto triple = DecodeNextTriple(bits)) {
|
|
for (int cValue : *triple) {
|
|
// X12 segment terminator <CR>, separator *, sub-element separator >, space
|
|
static const char segChars[4] = {'\r', '*', '>', ' '};
|
|
if (cValue < 0)
|
|
throw FormatError("invalid value in AnsiX12 segment");
|
|
else if (cValue < 4)
|
|
result.push_back(segChars[cValue]);
|
|
else if (cValue < 14) // 0 - 9
|
|
result.push_back((char)(cValue + 44));
|
|
else if (cValue < 40) // A - Z
|
|
result.push_back((char)(cValue + 51));
|
|
else
|
|
throw FormatError("invalid value in AnsiX12 segment");
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* See ISO 16022:2006, 5.2.8 and Annex C Table C.3
|
|
*/
|
|
static void DecodeEdifactSegment(BitSource& bits, Content& result)
|
|
{
|
|
// If there are less than 3 bytes left then it will be encoded as ASCII
|
|
while (bits.available() >= 24) {
|
|
for (int i = 0; i < 4; i++) {
|
|
char edifactValue = bits.readBits(6);
|
|
|
|
// Check for the unlatch character
|
|
if (edifactValue == 0x1F) { // 011111
|
|
// Read rest of byte, which should be 0, and stop
|
|
if (bits.bitOffset())
|
|
bits.readBits(8 - bits.bitOffset());
|
|
return;
|
|
}
|
|
|
|
if ((edifactValue & 0x20) == 0) // no 1 in the leading (6th) bit
|
|
edifactValue |= 0x40; // Add a leading 01 to the 6 bit binary value
|
|
result.push_back(edifactValue);
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* See ISO 16022:2006, Annex B, B.2
|
|
*/
|
|
static int Unrandomize255State(int randomizedBase256Codeword, int base256CodewordPosition)
|
|
{
|
|
int pseudoRandomNumber = ((149 * base256CodewordPosition) % 255) + 1;
|
|
int tempVariable = randomizedBase256Codeword - pseudoRandomNumber;
|
|
return tempVariable >= 0 ? tempVariable : tempVariable + 256;
|
|
}
|
|
|
|
/**
|
|
* See ISO 16022:2006, 5.2.9 and Annex B, B.2
|
|
*/
|
|
static void DecodeBase256Segment(BitSource& bits, Content& result)
|
|
{
|
|
// Figure out how long the Base 256 Segment is.
|
|
int codewordPosition = 1 + bits.byteOffset(); // position is 1-indexed
|
|
int d1 = Unrandomize255State(bits.readBits(8), codewordPosition++);
|
|
int count;
|
|
if (d1 == 0) // Read the remainder of the symbol
|
|
count = bits.available() / 8;
|
|
else if (d1 < 250)
|
|
count = d1;
|
|
else
|
|
count = 250 * (d1 - 249) + Unrandomize255State(bits.readBits(8), codewordPosition++);
|
|
|
|
// We're seeing NegativeArraySizeException errors from users.
|
|
if (count < 0)
|
|
throw FormatError("invalid count in Base256 segment");
|
|
|
|
result.reserve(count);
|
|
for (int i = 0; i < count; i++) {
|
|
// readBits(8) may fail, have seen this particular error in the wild, such as at
|
|
// http://www.bcgen.com/demo/IDAutomationStreamingDataMatrix.aspx?MODE=3&D=Fred&PFMT=3&PT=F&X=0.3&O=0&LM=0.2
|
|
result += narrow_cast<uint8_t>(Unrandomize255State(bits.readBits(8), codewordPosition++));
|
|
}
|
|
}
|
|
|
|
ZXING_EXPORT_TEST_ONLY
|
|
DecoderResult Decode(ByteArray&& bytes, const bool isDMRE)
|
|
{
|
|
BitSource bits(bytes);
|
|
Content result;
|
|
Error error;
|
|
result.symbology = {'d', '1', 3}; // ECC 200 (ISO 16022:2006 Annex N Table N.1)
|
|
std::string resultTrailer;
|
|
|
|
struct StructuredAppendInfo sai;
|
|
bool readerInit = false;
|
|
bool firstCodeword = true;
|
|
bool done = false;
|
|
int firstFNC1Position = 1;
|
|
Shift128 upperShift;
|
|
|
|
auto setError = [&error](Error&& e) {
|
|
// return only the first error but keep on decoding if possible
|
|
if (!error)
|
|
error = std::move(e);
|
|
};
|
|
|
|
// See ISO 16022:2006, 5.2.3 and Annex C, Table C.2
|
|
try {
|
|
while (!done && bits.available() >= 8) {
|
|
int oneByte = bits.readBits(8);
|
|
switch (oneByte) {
|
|
case 0: setError(FormatError("invalid 0 code word")); break;
|
|
case 129: done = true; break; // Pad -> we are done, ignore the rest of the bits
|
|
case 230: DecodeC40OrTextSegment(bits, result, Mode::C40); break;
|
|
case 231: DecodeBase256Segment(bits, result); break;
|
|
case 232: // FNC1
|
|
// Only recognizing an FNC1 as first/second by codeword position (aka symbol character position), not
|
|
// by decoded character position, i.e. not recognizing a C40/Text encoded FNC1 (which requires a latch
|
|
// and a shift)
|
|
if (bits.byteOffset() == firstFNC1Position)
|
|
result.symbology.modifier = '2'; // GS1
|
|
else if (bits.byteOffset() == firstFNC1Position + 1)
|
|
result.symbology.modifier = '3'; // AIM, note no AIM Application Indicator format defined, ISO 16022:2006 11.2
|
|
else
|
|
result.push_back((char)29); // translate as ASCII 29 <GS>
|
|
break;
|
|
case 233: // Structured Append
|
|
if (!firstCodeword) // Must be first ISO 16022:2006 5.6.1
|
|
setError(FormatError("structured append tag must be first code word"));
|
|
ParseStructuredAppend(bits, sai);
|
|
firstFNC1Position = 5;
|
|
break;
|
|
case 234: // Reader Programming
|
|
if (!firstCodeword) // Must be first ISO 16022:2006 5.2.4.9
|
|
setError(FormatError("reader programming tag must be first code word"));
|
|
readerInit = true;
|
|
break;
|
|
case 235: upperShift.set = true; break; // Upper Shift (shift to Extended ASCII)
|
|
case 236: // ISO 15434 format "05" Macro
|
|
result.append("[)>\x1E" "05\x1D");
|
|
resultTrailer.insert(0, "\x1E\x04");
|
|
break;
|
|
case 237: // ISO 15434 format "06" Macro
|
|
result.append("[)>\x1E" "06\x1D");
|
|
resultTrailer.insert(0, "\x1E\x04");
|
|
break;
|
|
case 238: DecodeAnsiX12Segment(bits, result); break;
|
|
case 239: DecodeC40OrTextSegment(bits, result, Mode::TEXT); break;
|
|
case 240: DecodeEdifactSegment(bits, result); break;
|
|
case 241: result.switchEncoding(ParseECIValue(bits)); break;
|
|
default:
|
|
if (oneByte <= 128) { // ASCII data (ASCII value + 1)
|
|
result.push_back(upperShift(oneByte) - 1);
|
|
} else if (oneByte <= 229) { // 2-digit data 00-99 (Numeric Value + 130)
|
|
result.append(ToString(oneByte - 130, 2));
|
|
} else if (oneByte >= 242) { // Not to be used in ASCII encodation
|
|
// work around encoders that use unlatch to ASCII as last code word (ask upstream)
|
|
if (oneByte == 254 && bits.available() == 0)
|
|
break;
|
|
setError(FormatError("invalid code word"));
|
|
break;
|
|
}
|
|
}
|
|
firstCodeword = false;
|
|
}
|
|
} catch (Error e) {
|
|
setError(std::move(e));
|
|
}
|
|
|
|
result.append(resultTrailer);
|
|
result.symbology.aiFlag = result.symbology.modifier == '2' ? AIFlag::GS1 : AIFlag::None;
|
|
result.symbology.modifier += isDMRE * 6;
|
|
|
|
return DecoderResult(std::move(result))
|
|
.setError(std::move(error))
|
|
.setStructuredAppend(sai)
|
|
.setReaderInit(readerInit);
|
|
}
|
|
|
|
} // namespace DecodedBitStreamParser
|
|
|
|
/**
|
|
* <p>Given data and error-correction codewords received, possibly corrupted by errors, attempts to
|
|
* correct the errors in-place using Reed-Solomon error correction.</p>
|
|
*
|
|
* @param codewordBytes data and error correction codewords
|
|
* @param numDataCodewords number of codewords that are data bytes
|
|
* @return false if error correction fails
|
|
*/
|
|
static bool
|
|
CorrectErrors(ByteArray& codewordBytes, int numDataCodewords)
|
|
{
|
|
// First read into an array of ints
|
|
std::vector<int> codewordsInts(codewordBytes.begin(), codewordBytes.end());
|
|
int numECCodewords = Size(codewordBytes) - numDataCodewords;
|
|
|
|
if (!ReedSolomonDecode(GenericGF::DataMatrixField256(), codewordsInts, numECCodewords))
|
|
return false;
|
|
|
|
// Copy back into array of bytes -- only need to worry about the bytes that were data
|
|
// We don't care about errors in the error-correction codewords
|
|
std::copy_n(codewordsInts.begin(), numDataCodewords, codewordBytes.begin());
|
|
|
|
return true;
|
|
}
|
|
|
|
static DecoderResult DoDecode(const BitMatrix& bits)
|
|
{
|
|
// Construct a parser and read version, error-correction level
|
|
const Version* version = VersionForDimensionsOf(bits);
|
|
if (version == nullptr)
|
|
return FormatError("Invalid matrix dimension");
|
|
|
|
// Read codewords
|
|
ByteArray codewords = CodewordsFromBitMatrix(bits, *version);
|
|
if (codewords.empty())
|
|
return FormatError("Invalid number of code words");
|
|
|
|
bool fix259 = false; // see https://github.com/zxing-cpp/zxing-cpp/issues/259
|
|
retry:
|
|
// Separate into data blocks
|
|
std::vector<DataBlock> dataBlocks = GetDataBlocks(codewords, *version, fix259);
|
|
if (dataBlocks.empty())
|
|
return FormatError("Invalid number of data blocks");
|
|
|
|
// Count total number of data bytes
|
|
ByteArray resultBytes(TransformReduce(dataBlocks, 0, [](const auto& db) { return db.numDataCodewords; }));
|
|
|
|
// Error-correct and copy data blocks together into a stream of bytes
|
|
const int dataBlocksCount = Size(dataBlocks);
|
|
for (int j = 0; j < dataBlocksCount; j++) {
|
|
auto& [numDataCodewords, codewords] = dataBlocks[j];
|
|
if (!CorrectErrors(codewords, numDataCodewords)) {
|
|
if(version->versionNumber == 24 && !fix259) {
|
|
fix259 = true;
|
|
goto retry;
|
|
}
|
|
return ChecksumError();
|
|
}
|
|
|
|
for (int i = 0; i < numDataCodewords; i++) {
|
|
// De-interlace data blocks.
|
|
resultBytes[i * dataBlocksCount + j] = codewords[i];
|
|
}
|
|
}
|
|
#ifdef PRINT_DEBUG
|
|
if (fix259)
|
|
printf("-> needed retry with fix259 for 144x144 symbol\n");
|
|
#endif
|
|
|
|
// Decode the contents of that stream of bytes
|
|
return DecodedBitStreamParser::Decode(std::move(resultBytes), version->isDMRE())
|
|
.setVersionNumber(version->versionNumber);
|
|
}
|
|
|
|
static BitMatrix FlippedL(const BitMatrix& bits)
|
|
{
|
|
BitMatrix res(bits.height(), bits.width());
|
|
for (int y = 0; y < res.height(); ++y)
|
|
for (int x = 0; x < res.width(); ++x)
|
|
res.set(x, y, bits.get(bits.width() - 1 - y, bits.height() - 1 - x));
|
|
return res;
|
|
}
|
|
|
|
DecoderResult Decode(const BitMatrix& bits)
|
|
{
|
|
auto res = DoDecode(bits);
|
|
if (res.isValid())
|
|
return res;
|
|
|
|
//TODO:
|
|
// * unify bit mirroring helper code with QRReader?
|
|
// * rectangular symbols with the a size of 8 x Y are not supported a.t.m.
|
|
if (auto mirroredRes = DoDecode(FlippedL(bits)); mirroredRes.error().type() != Error::Checksum) {
|
|
mirroredRes.setIsMirrored(true);
|
|
return mirroredRes;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
} // namespace ZXing::DataMatrix
|