How to Build Python MRZ Scanner SDK and Publish It to PyPI

Implementing Python MRZ Scanner SDK in C/C++

Here is the structure of the Python MRZ extension project.

  • initLicense(): Initialize the global license key.
  • createInstance(): Create an instance of DynamsoftMrzReader class.
#include <Python.h>
#include <stdio.h>
#include "dynamsoft_mrz_reader.h"
#define INITERROR return NULL

struct module_state {
PyObject *error;
};

#define GETSTATE(m) ((struct module_state*)PyModule_GetState(m))

static PyObject *
error_out(PyObject *m)
{
struct module_state *st = GETSTATE(m);
PyErr_SetString(st->error, "something bad happened");
return NULL;
}

static PyObject *createInstance(PyObject *obj, PyObject *args)
{
if (PyType_Ready(&DynamsoftMrzReaderType) < 0)
INITERROR;

DynamsoftMrzReader* reader = PyObject_New(DynamsoftMrzReader, &DynamsoftMrzReaderType);
reader->handler = DLR_CreateInstance();
return (PyObject *)reader;
}

static PyObject *initLicense(PyObject *obj, PyObject *args)
{
char *pszLicense;
if (!PyArg_ParseTuple(args, "s", &pszLicense))
{
return NULL;
}

char errorMsgBuffer[512];
// Click https://www.dynamsoft.com/customer/license/trialLicense/?product=dbr to get a trial license.
int ret = DLR_InitLicense(pszLicense, errorMsgBuffer, 512);
printf("DLR_InitLicense: %s\n", errorMsgBuffer);

return Py_BuildValue("i", ret);
}

static PyMethodDef mrzscanner_methods[] = {
{"initLicense", initLicense, METH_VARARGS, "Set license to activate the SDK"},
{"createInstance", createInstance, METH_VARARGS, "Create Dynamsoft MRZ Reader object"},
{NULL, NULL, 0, NULL}
};

static struct PyModuleDef mrzscanner_module_def = {
PyModuleDef_HEAD_INIT,
"mrzscanner",
"Internal \"mrzscanner\" module",
-1,
mrzscanner_methods
};

PyMODINIT_FUNC PyInit_mrzscanner(void)
{
PyObject *module = PyModule_Create(&mrzscanner_module_def);
if (module == NULL)
INITERROR;


if (PyType_Ready(&DynamsoftMrzReaderType) < 0)
INITERROR;

Py_INCREF(&DynamsoftMrzReaderType);
PyModule_AddObject(module, "DynamsoftMrzReader", (PyObject *)&DynamsoftMrzReaderType);

if (PyType_Ready(&MrzResultType) < 0)
INITERROR;

Py_INCREF(&MrzResultType);
PyModule_AddObject(module, "MrzResult", (PyObject *)&MrzResultType);

PyModule_AddStringConstant(module, "version", DLR_GetVersion());
return module;
}
  • decodeFile(): Recognize MRZ from an image file.
  • decodeMat(): Recognize MRZ from OpenCV Mat.
  • loadModel(): Load the MRZ model by parsing a JSON-formatted configuration file.
#ifndef __MRZ_READER_H__
#define __MRZ_READER_H__

#include <Python.h>
#include <structmember.h>
#include "DynamsoftLabelRecognizer.h"
#include "mrz_result.h"

#define DEBUG 0

typedef struct
{
PyObject_HEAD
void *handler;
} DynamsoftMrzReader;

static int DynamsoftMrzReader_clear(DynamsoftMrzReader *self)
{
if(self->handler) {
DLR_DestroyInstance(self->handler);
self->handler = NULL;
}
return 0;
}

static void DynamsoftMrzReader_dealloc(DynamsoftMrzReader *self)
{
DynamsoftMrzReader_clear(self);
Py_TYPE(self)->tp_free((PyObject *)self);
}

static PyObject *DynamsoftMrzReader_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
DynamsoftMrzReader *self;

self = (DynamsoftMrzReader *)type->tp_alloc(type, 0);
if (self != NULL)
{
self->handler = DLR_CreateInstance();
}

return (PyObject *)self;
}

static PyMethodDef instance_methods[] = {
{"decodeFile", decodeFile, METH_VARARGS, NULL},
{"decodeMat", decodeMat, METH_VARARGS, NULL},
{"loadModel", loadModel, METH_VARARGS, NULL},
{NULL, NULL, 0, NULL}
};

static PyTypeObject DynamsoftMrzReaderType = {
PyVarObject_HEAD_INIT(NULL, 0) "mrzscanner.DynamsoftMrzReader", /* tp_name */
sizeof(DynamsoftMrzReader), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)DynamsoftMrzReader_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
PyObject_GenericSetAttr, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
"DynamsoftMrzReader", /* tp_doc */
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
instance_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
DynamsoftMrzReader_new, /* tp_new */
};

#endif
{
"CharacterModelArray" : [
{
"DirectoryPath": "model",
"FilterFilePath": "",
"Name": "MRZ"
}
],
"LabelRecognizerParameterArray" : [
{
"BinarizationModes" : [
{
"BlockSizeX" : 0,
"BlockSizeY" : 0,
"EnableFillBinaryVacancy" : 1,
"LibraryFileName" : "",
"LibraryParameters" : "",
"Mode" : "BM_LOCAL_BLOCK",
"ThreshValueCoefficient" : 15
}
],
"CharacterModelName" : "MRZ",
"LetterHeightRange" : [ 5, 1000, 1 ],
"LineStringLengthRange" : [30, 44],
"MaxLineCharacterSpacing" : 130,
"LineStringRegExPattern" : "([ACI][A-Z<][A-Z<]{3}[A-Z0-9<]{9}[0-9][A-Z0-9<]{15}){(30)}|([0-9]{2}[(01-12)][(01-31)][0-9][MF<][0-9]{2}[(01-12)][(01-31)][0-9][A-Z<]{3}[A-Z0-9<]{11}[0-9]){(30)}|([A-Z<]{0,26}[A-Z]{1,3}[(<<)][A-Z]{1,3}[A-Z<]{0,26}<{0,26}){(30)}|([ACIV][A-Z<][A-Z<]{3}([A-Z<]{0,27}[A-Z]{1,3}[(<<)][A-Z]{1,3}[A-Z<]{0,27}){(31)}){(36)}|([A-Z0-9<]{9}[0-9][A-Z<]{3}[0-9]{2}[(01-12)][(01-31)][0-9][MF<][0-9]{2}[(01-12)][(01-31)][0-9][A-Z0-9<]{8}){(36)}|([PV][A-Z<][A-Z<]{3}([A-Z<]{0,35}[A-Z]{1,3}[(<<)][A-Z]{1,3}[A-Z<]{0,35}<{0,35}){(39)}){(44)}|([A-Z0-9<]{9}[0-9][A-Z<]{3}[0-9]{2}[(01-12)][(01-31)][0-9][MF<][0-9]{2}[(01-12)][(01-31)][0-9][A-Z0-9<]{14}[A-Z0-9<]{2}){(44)}",
"MaxThreadCount" : 4,
"Name" : "locr",
"TextureDetectionModes" :[
{
"Mode" : "TDM_GENERAL_WIDTH_CONCENTRATION",
"Sensitivity" : 8
}
],
"ReferenceRegionNameArray" : [ "DRRegion" ]
}
],
"LineSpecificationArray" : [
{
"Name":"L0",
"LineNumber":"",
"BinarizationModes" : [
{
"BlockSizeX" : 30,
"BlockSizeY" : 30,
"Mode" : "BM_LOCAL_BLOCK"
}
]
}
],
"ReferenceRegionArray" : [
{
"Localization" : {
"FirstPoint" : [ 0, 0 ],
"SecondPoint" : [ 100, 0 ],
"ThirdPoint" : [ 100, 100 ],
"FourthPoint" : [ 0, 100 ],
"MeasuredByPercentage" : 1,
"SourceType" : "LST_MANUAL_SPECIFICATION"
},
"Name" : "DRRegion",
"TextAreaNameArray" : [ "DTArea" ]
}
],
"TextAreaArray" : [
{
"LineSpecificationNameArray" : ["L0"],
"Name" : "DTArea",
"FirstPoint" : [ 0, 0 ],
"SecondPoint" : [ 100, 0 ],
"ThirdPoint" : [ 100, 100 ],
"FourthPoint" : [ 0, 100 ]
}
]
}
def get_model_path():
config_file = os.path.join(os.path.dirname(__file__), 'MRZ.json')
try:
# open json file
with open(config_file, 'r+') as f:
data = json.load(f)
if data['CharacterModelArray'][0]['DirectoryPath'] == 'model':
data['CharacterModelArray'][0]['DirectoryPath'] = os.path.join(os.path.dirname(__file__), 'model')
# print(data['CharacterModelArray'][0]['DirectoryPath'])

# write json file
f.seek(0) # rewind
f.write(json.dumps(data))
except Exception as e:
print(e)
pass

return config_file
typedef struct 
{
PyObject_HEAD
PyObject *confidence;
PyObject *text;
PyObject *x1;
PyObject *y1;
PyObject *x2;
PyObject *y2;
PyObject *x3;
PyObject *y3;
PyObject *x4;
PyObject *y4;
} MrzResult;
  1. Set the license key.
mrzscanner.initLicense("your license key") 
scanner = mrzscanner.createInstance()
scanner.loadModel(mrzscanner.get_model_path())
results = scanner.decodeFile()
# or
results = scanner.decodeMat()
for result in results:
print(result.text)

Configuring Setup.py File for Building and Packaging Python C Extension

The following code shows how to build the Python C extension with shared libraries for Windows and Linux:

dbr_lib_dir = ''
dbr_include = ''
dbr_lib_name = 'DynamsoftLabelRecognizer'

if sys.platform == "linux" or sys.platform == "linux2":
# Linux
dbr_lib_dir = 'lib/linux'
elif sys.platform == "win32":
# Windows
dbr_lib_name = 'DynamsoftLabelRecognizerx64'
dbr_lib_dir = 'lib/win'

if sys.platform == "linux" or sys.platform == "linux2":
ext_args = dict(
library_dirs=[dbr_lib_dir],
extra_compile_args=['-std=c++11'],
extra_link_args=["-Wl,-rpath=$ORIGIN"],
libraries=[dbr_lib_name],
include_dirs=['include']
)


long_description = io.open("README.md", encoding="utf-8").read()

if sys.platform == "linux" or sys.platform == "linux2" or sys.platform == "darwin":
module_mrzscanner = Extension(
'mrzscanner', ['src/mrzscanner.cpp'], **ext_args)
else:
module_mrzscanner = Extension('mrzscanner',
sources=['src/mrzscanner.cpp'],
include_dirs=['include'], library_dirs=[dbr_lib_dir], libraries=[dbr_lib_name])
def copyfiles(src, dst):
if os.path.isdir(src):
filelist = os.listdir(src)
for file in filelist:
libpath = os.path.join(src, file)
shutil.copy2(libpath, dst)
else:
shutil.copy2(src, dst)

class CustomBuildExt(build_ext.build_ext):
def run(self):
build_ext.build_ext.run(self)
dst = os.path.join(self.build_lib, "mrzscanner")
copyfiles(dbr_lib_dir, dst)
filelist = os.listdir(self.build_lib)
for file in filelist:
filePath = os.path.join(self.build_lib, file)
if not os.path.isdir(file):
copyfiles(filePath, dst)
# delete file for wheel package
os.remove(filePath)

model_dest = os.path.join(dst, 'model')
if (not os.path.exists(model_dest)):
os.mkdir(model_dest)

copyfiles(os.path.join(os.path.join(
Path(__file__).parent, 'model')), model_dest)
shutil.copy2('MRZ.json', dst)

setup(name='mrz-scanner-sdk',
...
cmdclass={
'build_ext': CustomBuildExt},
)
python setup.py build install
python setup.py sdist
pip wheel . --verbose
# Or
python setup.py bdist_wheel

Testing Python MRZ Scanner SDK

  1. Install mrz and opencv-python.
pip install mrz opencv-python
  • mrz is used to extract and check MRZ information from recognized text.
  • opencv-python is used to display the image.
import argparse
import mrzscanner
import cv2
import sys
import numpy as np

from mrz.checker.td1 import TD1CodeChecker
from mrz.checker.td2 import TD2CodeChecker
from mrz.checker.td3 import TD3CodeChecker
from mrz.checker.mrva import MRVACodeChecker
from mrz.checker.mrvb import MRVBCodeChecker

def check(lines):
try:
td1_check = TD1CodeChecker(lines)
if bool(td1_check):
return "TD1", td1_check.fields()
except Exception as err:
pass

try:
td2_check = TD2CodeChecker(lines)
if bool(td2_check):
return "TD2", td2_check.fields()
except Exception as err:
pass

try:
td3_check = TD3CodeChecker(lines)
if bool(td3_check):
return "TD3", td3_check.fields()
except Exception as err:
pass

try:
mrva_check = MRVACodeChecker(lines)
if bool(mrva_check):
return "MRVA", mrva_check.fields()
except Exception as err:
pass

try:
mrvb_check = MRVBCodeChecker(lines)
if bool(mrvb_check):
return "MRVB", mrvb_check.fields()
except Exception as err:
pass

return 'No valid MRZ information found'

def scanmrz():
"""
Command-line script for recognize MRZ info from a given image
"""
parser = argparse.ArgumentParser(description='Scan MRZ info from a given image')
parser.add_argument('filename')
args = parser.parse_args()
try:
filename = args.filename
ui = args.ui

# Get the license key from https://www.dynamsoft.com/customer/license/trialLicense/?product=dlr
mrzscanner.initLicense("DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ==")

scanner = mrzscanner.createInstance()
scanner.loadModel(mrzscanner.get_model_path())
results = scanner.decodeFile(filename)
for result in results:
print(result.text)
s += result.text + '\n'

print(check(s[:-1]))

except Exception as err:
print(err)
sys.exit(1)

if __name__ == "__main__":
scanmrz()
python app.py

GitHub Workflow Configuration

We create a new GitHub action workflow as follows:

name: Build and upload to PyPI

on: [push, pull_request]

jobs:
build_wheels:
name: Build wheels on $
runs-on: $
strategy:
matrix:
os: [ubuntu-latest, windows-latest]
python-version: ['3.6', '3.7', '3.8', '3.9', '3.10']

steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: $

- name: Run test.py in develop mode
run: |
python setup.py develop
python -m pip install opencv-python mrz
python --version
python test.py

- name: Build wheels for Linux
if: matrix.os == 'ubuntu-latest'
run: |
pip install -U wheel setuptools auditwheel patchelf
python setup.py bdist_wheel
auditwheel repair dist/mrz_scanner_sdk*.whl --plat manylinux2014_$(uname -m)

- name: Build wheels for Windows
if: matrix.os == 'windows-latest'
run: |
pip install -U wheel setuptools
python setup.py bdist_wheel -d wheelhouse

- uses: actions/upload-artifact@v2
with:
path: wheelhouse/*.whl

build_sdist:
name: Build source distribution
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2

- name: Build sdist
run: python setup.py sdist -d dist

- uses: actions/upload-artifact@v2
with:
path: dist/*.tar.gz

upload_pypi:
needs: [build_wheels, build_sdist]
runs-on: ubuntu-latest

if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
steps:
- uses: actions/download-artifact@v2
with:
name: artifact
path: dist

- uses: pypa/gh-action-pypi-publish@v1.4.2
with:
user: __token__
password: $
skip_existing: true

Install mrz-scanner-sdk from PyPI

https://pypi.org/project/mrz-scanner-sdk/

pip install mrz-scanner-sdk

Source Code

https://github.com/yushulx/python-mrz-scanner-sdk

--

--

Manager of Dynamsoft Open Source Projects | Tech Lover

Love podcasts or audiobooks? Learn on the go with our new app.

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store
Xiao Ling

Xiao Ling

Manager of Dynamsoft Open Source Projects | Tech Lover