@InProceedings{SouzaNetoBezeToseLima:2020:DeLeSy,
author = "Souza Neto, Arthur Flor de and Bezerra, Byron Leite Dantas and
Toselli, Alejandro Hector and Lima, Estanislau Baptista",
affiliation = "{Universidade de Pernambuco} and {Universidade de Pernambuco} and
{Universitat Politecnica de Valencia} and {Universidade de
Pernambuco}",
title = "HTR-Flor: a deep learning system for offline handwritten text
recognition",
booktitle = "Proceedings...",
year = "2020",
editor = "Musse, Soraia Raupp and Cesar Junior, Roberto Marcondes and
Pelechano, Nuria and Wang, Zhangyang (Atlas)",
organization = "Conference on Graphics, Patterns and Images, 33. (SIBGRAPI)",
publisher = "IEEE Computer Society",
address = "Los Alamitos",
keywords = "Handwritten Text Recognition, Gated Convolutional Neural Networks,
Gated CNN, Deep Neural Networks.",
abstract = "In recent years, Handwritten Text Recognition (HTR) has captured a
lot of attention among the researchers of the computer vision
community. Current state-of-the-art approaches for offline HTR are
based on Convolutional Recurrent Neural Networks (CRNNs) excel at
scene text recognition. Unfortunately, deep models such as CRNNs,
Recurrent Neural Networks (RNNs) are likely to suffer from
vanishing/exploding gradient problems when processing long text
images, which are commonly found in scanned documents. Besides,
they usually have millions of parameters which require huge amount
of data, and computational resource. Recently, a new class of
neural network architecture, called Gated Convolutional Neural
Networks (Gated-CNN), has demonstrated potentials to complement
CRNN methods in modeling. Therefore, in this paper, we present a
new architecture for HTR, based on Gated-CNN, with fewer
parameters and fewer layers, which is able to outperform the
current state-of-the-art architectures for HTR. The experiment
validates that the proposed model has statistically significant
recognition results, surpassing previous HTR systems by an average
of 33% over five important handwritten benchmark datasets.
Moreover, the proposed model is able to achieve satisfactory
recognition rates even in case of few training data. Finally, its
compact architecture requires less computational resources, which
can be applied for real-world applications that have hardware
limitations, such as robots and smartphones.",
conference-location = "Porto de Galinhas (virtual)",
conference-year = "7-10 Nov. 2020",
doi = "10.1109/SIBGRAPI51738.2020.00016",
url = "http://dx.doi.org/10.1109/SIBGRAPI51738.2020.00016",
language = "en",
url = "http://sibgrapi.sid.inpe.br/ibi/8JMKD3MGPEW34M/4388QM2",
targetfile = "PID6607213.pdf",
urlaccessdate = "2025, Apr. 25"
}