@inbook{1dc99947f3c64cd1b76a26281cdf99ba,
title = "Document Collection Visual Question Answering",
abstract = "Current tasks and methods in Document Understanding aims to process documents as single elements. However, documents are usually organized in collections (historical records, purchase invoices), that provide context useful for their interpretation. To address this problem, we introduce Document Collection Visual Question Answering (DocCVQA) a new dataset and related task, where questions are posed over a whole collection of document images and the goal is not only to provide the answer to the given question, but also to retrieve the set of documents that contain the information needed to infer the answer. Along with the dataset we propose a new evaluation metric and baselines which provide further insights to the new dataset and task.",
keywords = "Document collection, Visual Question Answering",
author = "Rub{\`e}n Tito and Dimosthenis Karatzas and Ernest Valveny",
note = "Funding Information: Acknowledgements. This work has been supported by the UAB PIF scholarship B18P0070 and the Consolidated Research Group 2017-SGR-1783 from the Research and University Department of the Catalan Government. Publisher Copyright: {\textcopyright} 2021, Springer Nature Switzerland AG.",
year = "2021",
doi = "10.1007/978-3-030-86331-9_50",
language = "English",
isbn = "9783030863302",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "778--792",
editor = "Josep Llad{\'o}s and Daniel Lopresti and Seiichi Uchida",
booktitle = "Document Analysis and Recognition – ICDAR 2021 - 16th International Conference, Proceedings",
}