@inbook{88b3bfcb126a44699cdc7a35201d5863,
title = "A Multilingual Approach to Scene Text Visual Question Answering",
abstract = "Scene Text Visual Question Answering (ST-VQA) has recently emerged as a hot research topic in Computer Vision. Current ST-VQA models have a big potential for many types of applications but lack the ability to perform well on more than one language at a time due to the lack of multilingual data, as well as the use of monolingual word embeddings for training. In this work, we explore the possibility to obtain bilingual and multilingual VQA models. In that regard, we use an already established VQA model that uses monolingual word embeddings as part of its pipeline and substitute them by FastText and BPEmb multilingual word embeddings that have been aligned to English. Our experiments demonstrate that it is possible to obtain bilingual and multilingual VQA models with a minimal loss in performance in languages not used during training, as well as a multilingual model trained in multiple languages that match the performance of the respective monolingual baselines.",
keywords = "Deep learning, Multilingual word embeddings, Scene text, Vision and language, Visual question answering",
author = "{Brugu{\'e}s i Pujolr{\`a}s}, Josep and {G{\'o}mez i Bigord{\`a}}, Llu{\'i}s and Dimosthenis Karatzas",
note = "Funding Information: Acknowledgment. This work has been supported by: Grant PDC2021-121512-I00 funded by MCIN /AEI/10.13039/501100011033 and the European Union NextGen-erationEU/PRTR; Project PID2020-116298GB-I00 funded by MCIN/ AEI /10.13039/501100011033; Grant PLEC2021-007850 funded by MCIN/AEI/10.13039/501100011033 and the European Union NextGenerationEU/PRTR. Publisher Copyright: {\textcopyright} 2022, Springer Nature Switzerland AG.",
year = "2022",
doi = "10.1007/978-3-031-06555-2_5",
language = "English",
isbn = "9783031065545",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "65--79",
editor = "Seiichi Uchida and Elisa Barney and V{\'e}ronique Eglin",
booktitle = "Document Analysis Systems - 15th IAPR International Workshop, DAS 2022, Proceedings",
}