Passport MRZ Recognition with Dynamsoft C++ OCR SDK

MRZ stands for machine readable zone. A passport has a machine-readable zone which contains the information about name, nationality, passport number, date of birth, sex, and expiration date of passport. This article introduces how to use Dynamsoft OCR SDK to recognize MRZ and parse the corresponding information.

About Dynamsoft OCR SDK

Prerequisites

Windows

sudo apt install libopencv-dev cmake

Passport MRZ Recognition in C++

In the following paragraphs, we firstly use Dynamsoft OCR SDK to localize the machine-readable zone of a passport and recognize the corresponding text string, and then extract all information from the text string according to the standard format of passport booklets.

Setting up CMake project

I strongly recommend installing CMake extension in Visual Studio Code to create and debug a CMake project for both Windows and Linux.

cmake_minimum_required (VERSION 2.6)
project (mrz)
MESSAGE( STATUS "PROJECT_NAME: " ${PROJECT_NAME} )

# Check platforms
if (CMAKE_HOST_WIN32)
set(WINDOWS 1)
elseif(CMAKE_HOST_UNIX)
set(LINUX 1)
endif()

# Add search path for include and lib files
MESSAGE( STATUS "CPU architecture ${CMAKE_SYSTEM_PROCESSOR}" )
if(WINDOWS)
link_directories("${PROJECT_SOURCE_DIR}/platform/windows/lib/")
elseif(LINUX)
link_directories("${PROJECT_SOURCE_DIR}/platform/linux/")
endif()
include_directories("${PROJECT_BINARY_DIR}" "${PROJECT_SOURCE_DIR}/include/")

# Add the executable
find_package(OpenCV REQUIRED)
add_executable(${PROJECT_NAME} mrzcv.cpp)
if(WINDOWS)
target_link_libraries (${PROJECT_NAME} "DynamsoftLabelRecognitionx64" ${OpenCV_LIBS})
else()
target_link_libraries (${PROJECT_NAME} "DynamsoftLabelRecognition" ${OpenCV_LIBS})
endif()

# Copy DLLs
if(WINDOWS)
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
"${PROJECT_SOURCE_DIR}/platform/windows/bin/"
$<TARGET_FILE_DIR:${PROJECT_NAME}>)
endif()
# Copy template
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
"${PROJECT_SOURCE_DIR}/template/"
$<TARGET_FILE_DIR:${PROJECT_NAME}>)

# Copy model files
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory
"${PROJECT_SOURCE_DIR}/CharacterModel"
$<TARGET_FILE_DIR:${PROJECT_NAME}>/CharacterModel)

Coding for MRZ detection and information parsing

Once the build configuration is done, we can move to the code part.

CLabelRecognition dlr;  dlr.InitLicense("LICENSE-KEY");
int ret = dlr.AppendSettingsFromFile("template-file");
"CharacterModelArray" : [
{
"DirectoryPath": "CharacterModel",
"FilterFilePath": "",
"Name": "NumberUppercase"
}
],
TickMeter tm;
tm.start();
errorCode = dlr.RecognizeByFile(pszImageFile, "locr");
tm.stop();
float costTime = tm.getTimeSec();
DLRResultArray* pDLRResults = NULL;
dlr.GetAllDLRResults(&pDLRResults);
if (pDLRResults != NULL)
{
int rCount = pDLRResults->resultsCount;
printf("\r\nRecognized %d results\r\n", rCount);
for (int ri = 0; ri < rCount; ++ri)
{
printf("\r\nResult %d :\r\n", ri);
int startX = 50, startY = 50;
DLRResult* result = pDLRResults->results[ri];
int lCount = result->lineResultsCount;
for (int li = 0; li < lCount; ++li)
{
printf("Line result %d: %s\r\n", li, result->lineResults[li]->text);
DLRPoint *points = result->lineResults[li]->location.points;
printf("x1: %d, y1: %d, x2: %d, y2: %d, x3: %d, y3: %d, x4: %d, y4: %d\r\n", points[0].x,
points[0].y, points[1].x, points[1].y, points[2].x, points[2].y, points[3].x, points[3].y);
}
}
}
else
{
printf("\r\nNo data detected.\r\n");
}
dlr.FreeDLRResults(&pDLRResults);
string line1 = result->lineResults[0]->text;
string line2 = result->lineResults[1]->text;
// https://en.wikipedia.org/wiki/Machine-readable_passport
// Type
string tmp = "Type: ";
tmp.insert(tmp.length(), 1, line1[0]);
printf("%s\r\n", tmp.c_str());

// Issuing country
tmp = "Issuing country: "; line1.substr(2, 5);
tmp += line1.substr(2, 3);
printf("%s\r\n", tmp.c_str());

// Surname
int index = 5;
tmp = "Surname: ";
for (; index < 44; index++)
{
if (line1[index] != '<')
{
tmp.insert(tmp.length(), 1, line1[index]);
}
else
{
break;
}
}
printf("%s\r\n", tmp.c_str());

// Given names
tmp = "Given Names: ";
index += 2;
for (; index < 44; index++)
{
if (line1[index] != '<')
{
tmp.insert(tmp.length(), 1, line1[index]);
}
else
{
tmp.insert(tmp.length(), 1, ' ');
}
}
printf("%s\r\n", tmp.c_str());

// Passport number
tmp = "Passport number: ";
index = 0;
for (; index < 9; index++)
{
if (line2[index] != '<')
{
tmp.insert(tmp.length(), 1, line2[index]);
}
else
{
break;
}
}
printf("%s\r\n", tmp.c_str());

// Nationality
tmp = "Nationality: ";
tmp += line2.substr(10, 3);
printf("%s\r\n", tmp.c_str());

// Date of birth
tmp = line2.substr(13, 6);
tmp.insert(2, "/");
tmp.insert(5, "/");
tmp = "Date of birth (YYMMDD): " + tmp;
printf("%s\r\n", tmp.c_str());

// Sex
tmp = "Sex: ";
tmp.insert(tmp.length(), 1, line2[20]);
printf("%s\r\n", tmp.c_str());

// Expiration date of passport
tmp = line2.substr(21, 6);
tmp.insert(2, "/");
tmp.insert(5, "/");
tmp = "Expiration date of passport (YYMMDD): " + tmp;
printf("%s\r\n", tmp.c_str());

// Personal number
if (line2[28] != '<')
{
tmp = "Personal number: ";
for (index = 28; index < 42; index++)
{
if (line2[index] != '<')
{
tmp.insert(tmp.length(), 1, line2[index]);
}
else
{
break;
}
}
printf("%s\r\n", tmp.c_str());
}
line( ori, Point(x1, y1), Point(x2, y2), lineColor, thickness);
line( ori, Point(x2, y2), Point(x3, y3), lineColor, thickness);
line( ori, Point(x3, y3), Point(x4, y4), lineColor, thickness);
line( ori, Point(x4, y4), Point(x1, y1), lineColor, thickness);
drawText(ori, result->lineResults[li]->text, minX, minY - scale * 10);

imshow("Passport MRZ Recognition", ori);
hconcat(before, after, newMat);
imshow("Comparison", newMat);

Source Code

https://github.com/yushulx/passport-mrz-recognition

Manager of Dynamsoft Open Source Projects | Tech Lover

Manager of Dynamsoft Open Source Projects | Tech Lover