<?xml version="1.0" encoding="utf-8"?>
<?xml-model href="https://zfdg.de/sites/default/files/medien/zfdg.rng" type="application/xml" schematypens="http://relaxng.org/ns/structure/1.0"?>
<?xml-model href="https://zfdg.de/sites/default/files/medien/zfdg.rng" type="application/xml" schematypens="http://purl.oclc.org/dsdl/schematron"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:tei="http://www.tei-c.org/ns/1.0">
   <teiHeader>
      <fileDesc>
         <titleStmt>
            <title level="a" type="full">Aiding Provenance Research. A Computer-Assisted Image Retrieval in Auction Catalogs</title>
            <title level="a" type="short">Aiding Provenance Research</title>
            <respStmt>
               <resp ref="http://id.loc.gov/vocabulary/relators/aut">Author</resp>
               <resp ref="https://credit.niso.org/contributor-roles/data-curation/">Data curation</resp>
               <resp ref="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</resp>
               <resp ref="https://credit.niso.org/contributor-roles/investigation/">Investigation</resp>
               <resp ref="https://credit.niso.org/contributor-roles/methodology/">Methodology</resp>
               <resp ref="https://credit.niso.org/contributor-roles/software/">Software</resp>
               <resp ref="https://credit.niso.org/contributor-roles/visualization/">Visualization</resp>
               <resp ref="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing&#160;– original draft</resp>
               <resp ref="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing&#160;– review &amp; editing</resp>
               <persName>
                  <forename>Mathias</forename>
                  <surname>Zinnen</surname>
                  <email>mathias.zinnen@fau.de</email>
                  <idno type="gnd">1393911307</idno>
                  <idno type="orcid">0000-0003-4366-5216</idno>
                  <affiliation>Friedrich-Alexander-Universität Erlangen-Nürnberg</affiliation>
               </persName>
            </respStmt>
            <respStmt>
               <resp ref="http://id.loc.gov/vocabulary/relators/aut">Author</resp>
               <resp ref="https://credit.niso.org/contributor-roles/conceptualization/">Conceptualization</resp>
               <resp ref="https://credit.niso.org/contributor-roles/formal-analysis/">Formal analysis</resp>
               <resp ref="https://credit.niso.org/contributor-roles/investigation/">Investigation</resp>
               <resp ref="https://credit.niso.org/contributor-roles/visualization/">Visualization</resp>
               <resp ref="https://credit.niso.org/contributor-roles/writing-original-draft/">Writing&#160;– original draft</resp>
               <resp ref="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing&#160;– review &amp; editing</resp>
               <persName>
                  <forename>Sabine</forename>
                  <surname>Lang</surname>
                  <email>sab.lang@fau.de</email>
                  <idno type="gnd">1100083812</idno>
                  <idno type="orcid">0000-0003-2543-0085</idno>
                  <affiliation>Friedrich-Alexander-Universität Erlangen-Nürnberg</affiliation>
               </persName>
            </respStmt>
            <respStmt>
               <resp ref="http://id.loc.gov/vocabulary/relators/aut">Author</resp>
               <resp ref="https://credit.niso.org/contributor-roles/supervision/">Supervision</resp>
               <resp ref="https://credit.niso.org/contributor-roles/funding-acquisition/">Funding acquisition</resp>
               <resp ref="https://credit.niso.org/contributor-roles/resources/">Resources</resp>
               <persName>
                  <forename>Andreas</forename>
                  <surname>Maier</surname>
                  <email>andreas.maier@fau.de</email>
                  <idno type="gnd">138422893</idno>
                  <idno type="orcid">0000-0002-9550-5284</idno>
                  <affiliation>Friedrich-Alexander-Universität Erlangen-Nürnberg</affiliation>
               </persName>
            </respStmt>
            <respStmt>
               <resp ref="http://id.loc.gov/vocabulary/relators/aut">Author</resp>
               <resp ref="https://credit.niso.org/contributor-roles/supervision/">Supervision</resp>
               <resp ref="https://credit.niso.org/contributor-roles/funding-acquisition/">funding acquisition</resp>
               <resp ref="https://credit.niso.org/contributor-roles/resources/">Resources</resp>
               <resp ref="https://credit.niso.org/contributor-roles/writing-review-editing/">Writing&#160;– review &amp; editing</resp>
               <persName>
                  <forename>Vincent</forename>
                  <surname>Christlein</surname>
                  <email>vincent.christlein@fau.de</email>
                  <idno type="gnd">1107595517</idno>
                  <idno type="orcid">0000-0003-0455-3799</idno>
                  <affiliation>Friedrich-Alexander-Universität Erlangen-Nürnberg</affiliation>
               </persName>
            </respStmt>
         </titleStmt>
         <editionStmt>
            <edition n="1.0"/>
            <respStmt>
               <resp ref="http://id.loc.gov/vocabulary/relators/dtm">Technische Redaktion</resp>
               <persName>
                  <forename>Martin</forename>
                  <surname>de la Iglesia</surname>
                  <idno type="gnd">1095143719</idno>
                  <idno type="orcid">0000-0002-9319-4793</idno>
               </persName>
            </respStmt>
            <respStmt>
               <resp ref="http://id.loc.gov/vocabulary/relators/dtm">Technische Redaktion</resp>
               <resp ref="http://id.loc.gov/vocabulary/relators/pfr">Textredaktion</resp>
               <persName>
                  <forename>Maximilian</forename>
                  <surname>Görmar</surname>
                  <idno type="gnd">1077317964</idno>
                  <idno type="orcid">0000-0003-3608-1140</idno>
               </persName>
            </respStmt>
         </editionStmt>
         <publicationStmt>
            <publisher n="Redaktionssitz">
               <orgName>Herzog August Bibliothek</orgName>
               <address>
                  <addrLine>Lessingplatz 1</addrLine>
                  <addrLine>38304 Wolfenbüttel</addrLine>
               </address>
            </publisher>
            <publisher n="herausgebendes Organ">
               <orgName>Forschungsverbund Marbach Weimar Wolfenbüttel</orgName>
               <address>
                  <addrLine>Burgplatz 4</addrLine>
                  <addrLine>99423 Weimar</addrLine>
               </address>
            </publisher>
            <publisher n="herausgebendes Organ">
               <orgName>Digital Humanities im deutschsprachigen Raum e. V.</orgName>
               <address>
                  <addrLine>Hamburg</addrLine>
               </address>
            </publisher>
            <date n="1.0" when="2026-06-30">30.06.2026</date>
            <idno type="doi">10.17175/sb008_005</idno>
            <idno type="ppn">196699317X</idno>
            <availability status="free">
               <licence target="https://creativecommons.org/licenses/by-sa/4.0/">CC BY-SA 4.0, sofern nicht anders angegeben.</licence>
            </availability>
         </publicationStmt>
         <seriesStmt>
            <title level="j">Zeitschrift für digitale Geisteswissenschaften</title>
            <title level="m">Bildähnlichkeit und Bildsuche: Geistes- und informationswissenschaftliche Zugänge zu historischem Material</title>
            <title level="s">Sonderbände</title>
            
            <respStmt>
               <resp ref="http://id.loc.gov/vocabulary/relators/edt">Editor</resp>
               <persName>
                  <forename>Hartmut</forename>
                  <surname>Beyer</surname>
                  <email>beyer@hab.de</email>
                  <idno type="gnd">136449824</idno>
                  <idno type="orcid">0000-0002-1158-1547</idno>
                  <affiliation>Herzog August Bibliothek Wolfenbüttel</affiliation>
               </persName>
            </respStmt> 
            <respStmt>
               <resp ref="http://id.loc.gov/vocabulary/relators/edt">Editor</resp>
               <persName>
                  <forename>Thomas</forename>
                  <surname>Mandl</surname>
                  <email>mandl@uni-hildesheim.de</email>
                  <idno type="gnd">138432600</idno>
                  <idno type="orcid">0000-0002-8398-9699</idno>
                  <affiliation>Universität Hildesheim, Institut für Informationswissenschaft &amp; Sprachtechnologie</affiliation>
               </persName>
            </respStmt>         
            <idno type="issn">2510-1366</idno>
            <idno type="ppn">1930709293</idno>
            <idno type="doi">10.17175/sb008</idno>
            <idno type="url">https://www.zfdg.de/sonderband/8</idno>     
            <biblScope unit="specialvolume">8</biblScope>
            <biblScope unit="article">5</biblScope>
         </seriesStmt>
         <sourceDesc>
            <p>Born digital: no previous source exists.</p>
         </sourceDesc>
      </fileDesc>
      <encodingDesc>
         <editorialDecl>
            <p>Letzte Überprüfung aller Verweise: <date when="2026-03-30">30.03.2026</date>
            </p>
         </editorialDecl>
         <schemaRef url="https://zfdg.de/sites/default/files/medien/zfdg.odd"/>
      </encodingDesc>
      <profileDesc>
         <textClass>
            <keywords n="Beitragstyp">
               <term>Fachartikel</term>
            </keywords>
            <keywords n="GND">
               <term ref="https://d-nb.info/gnd/4125859-9">Auktion</term>
               <term ref="https://d-nb.info/gnd/4006684-8">Bildverarbeitung</term>
               <term ref="https://d-nb.info/gnd/4138803-3">Kunstgeschichte</term>
               <term ref="https://d-nb.info/gnd/4193754-5">Maschinelles Lernen</term>
               <term ref="https://d-nb.info/gnd/7697228-8">Provenienzforschung</term>
            </keywords>
         </textClass>
      </profileDesc>
   </teiHeader>
   <text xml:lang="en">
      <front>
         
         <div type="abstract" xml:lang="en">
            <p>Provenance research examines the origin of objects and aims to reconstruct their ownership history. For this purpose, researchers increasingly use online resources such as the database <title>German Sales</title>, which contains auction and sales catalogs, through a full text search. While this is very valuable, a text-based search has limitations due to missing or varying information. This motivated us to search for a different, image-based method which enables searches for identical and similar images in large data collections using neural networks. We provide qualitative and quantitative results showing the effectiveness of our method and discuss the technical conceptualization of image similarity underlying the presented algorithm.</p>
         </div>
         <div type="abstract" xml:lang="de">
            <p>Provenienzforschung untersucht die Herkunft von Objekten und deren Besitzgeschichte. Dafür verwenden Forscher*innen Online-Datenbanken wie <title>German Sales</title>, die Auktions- und Verkaufskataloge enthält, die im Volltext durchsuchbar sind. Obwohl dieser Zugang wertvoll ist, limitieren fehlende oder abweichende Informationen den Erfolg einer textbasierten Suche. Dies hat uns motiviert, eine bildbasierte Methode zu entwickeln, die Suchen nach identischen und ähnlichen Bildern in Datensammlungen ermöglicht und auf neuronalen Netzen basiert. Wir präsentieren qualitative und quantitative Ergebnisse, die die Wirksamkeit der Methode zeigen, und diskutieren die technische Konzeptualisierung der Bildähnlichkeit, die dem Algorithmus zugrunde liegt.</p>
         </div>
      </front>
      <body>
         <div type="chapter">
            <head>1. Introduction</head>
            <p>Provenance research examines the origin and history of objects, ideally tracing them from their creation to the present day. It seeks information about previous owners and the circumstances surrounding the transfer of ownership, and is a core task in museums, libraries, archives, and other cultural institutions.<note type="footnote"> Cf. <ref type="bibliography" target="#zuschlag_einfuehrung_2022">Zuschlag 2022</ref>, pp.11–12.</note> In the German context, it encompasses objects looted by the National Socialists, property from colonial contexts, the German Democratic Republic (GDR) and Soviet Occupation Zone, or general war-related losses. To find out about an object’s provenance, researchers study the (physical) object, explore historical contexts related to persons and institutions, and utilize archival documents, literature and online-resources such as <ref target="https://digi.ub.uni-heidelberg.de/germansales/">German Sales</ref>.<note type="footnote"> Cf. <ref type="bibliography" target="#glaf_provenance_2020">German Lost Art Foundation et&#160;al. 2020</ref>, p. 6 and table of contents chapter 3.</note>
            </p>
            <p>The <title>German Sales</title>-database provides access to circa 15,000 auction and sales catalogs primarily from German-speaking countries, making it an essential source for studying the art market and collecting practices in the 20<hi rend="super">th</hi> century and for provenance research. All catalogs in <title>German Sales</title> are open access and searchable via a text entry in the full text.<note type="footnote"> Cf. <ref type="bibliography" target="#germansales_2025">German Sales 2025</ref>.</note> Despite the significant utility of a full-text search, it has its limitations: Missing information, varying titles, dates, or attributions for the same object might lead to unreliable or incomplete results. The following example illustrates this. A painting by the German artist Ludwig von Zumbusch (1861–1927) was titled <title>Römische Ideallandschaft</title> in an auction catalog in 1931<note type="footnote"> Cf. <ref type="bibliography" target="#ahh_auction_1931">Helbing 1931</ref>. </note>, appeared as <title>Landschaft mit Birken und Pappeln</title> in 1933<note type="footnote"> Cf. <ref type="bibliography" target="#ikua_auction_1933">Internationales Kunst- und Auktionshaus 1933</ref>.</note>, and eventually offered as <title>Einsames Land</title> in 2019<note type="footnote"> Cf. <ref type="bibliography" target="#kettererkunst_auction_2019">Ketterer Kunst 2019</ref>.</note> (see <ref type="graphic" target="#aiding_provenance_001">Fig.&#160;1</ref>). </p>
            <figure>
               <graphic xml:id="aiding_provenance_001" url="Medien/aiding_provenance_001.png">
                  <desc>
                     <ref type="intern" target="#abb1">Figure&#160;1</ref>: A painting by Ludwig von Zumbusch was offered at three auctions, each time under a different title. [Graphic: Mathias Zinnen&#160;/ Sabine Lang 2024, image source: <ref type="bibliography" target="#kettererkunst_auction_2019">Ketterer Kunst 2019</ref>]</desc>
               </graphic>
            </figure>
            <p>A search with the current title would have yielded no direct results. This motivated us to look for a different method. What if, instead of text, we use computational methods to perform an image-based search? To this end, we developed a process which automatically detects and crops images from auction catalogs, extracts quantifiable feature vectors, and stores them in a feature database. Additionally, we created an application to search for images in the processed auction catalogs. Users can upload arbitrary images to the application and are provided with the most similar images in the database according to the extracted feature vectors. This system can be used to detect different depictions of identical objects<note type="footnote"> When we speak of objects in the following, we primarily refer to paintings, graphics or sculptures. However, since everyday objects etc. are also depicted in the auction catalogs, we use the more inclusive term ›object‹.</note> or to explore similar images. The capability to explore similar images positions our work within a tradition of using <term type="dh">computer vision</term> for the analysis of historical art works to complement traditional humanities methodologies with new data-driven approaches.<note type="footnote"> Cf. <ref type="bibliography" target="#crowley_zisserman_art_2016">Crowley&#160;/ Zisserman 2016</ref>; <ref type="bibliography" target="#ufer_et_al_retrieval_2021">Ufer et&#160;al. 2021</ref>.</note>
            </p>
            <p>In the following sections, we first introduce related works and subsequently the dataset at hand. We will then present our methodology for image search in auction catalogs, followed by experiments and case studies. As the central topic of this special issue is image similarity, we will discuss the technical conceptualization of image similarity underlying the presented algorithm and contrast it with a human perspective. We conclude this article by summarizing the main findings and point to future work. </p>
         </div>
         <div type="chapter">
            <head>2. Related Work</head>
            <p>This work depends on two established computer vision techniques: object detection, which is required for the initial cropping of the images from the auction catalogs, and image retrieval, essential for computing image similarities based on extracted feature vectors. Conceptually, our method is anchored in recent and innovative approaches which apply digital methods to study the art market and support provenance research. We will first explore the technical underpinnings of our methodology before contextualizing our approach within the broader fields of digital art market analysis and provenance studies.</p>
            <div type="subchapter">
               <head>2.1 Object Detection</head>
               <p>Object Detection is one of the fundamental tasks in computer vision and maintains its prominence as one of the most active research areas.<note type="footnote"> Cf. <ref type="bibliography" target="#zou_et_al_object_object_2023">Zou et&#160;al. 2023</ref>.</note> It goes beyond image-level classification by requiring not only the classification but also the localization of objects within an image. Image-level classification, in contrast, involves assigning a label to an entire image, which generally poses less complexity. Classification benchmarks get their complexity from the amount of categories that need to be distinguished, as exemplified by the classic <ref target="https://www.image-net.org/">ImageNet</ref><note type="footnote"> Cf. <ref type="bibliography" target="#russakovsky_et_al_imagenet_2015">Russakovsky 2015</ref>.</note> benchmark, which required differentiation among 1,000, or in some versions, even 21,000 classes.<note type="footnote"> Cf. <ref type="bibliography" target="#ridnik_et_al_imagenet_2021">Ridnik et&#160;al. 2021</ref>.</note>
               </p>
               <p>In detection, two-stage methods like the <term type="dh">Region-based Convolutional Neural Network (R-CNN)</term> family,<note type="footnote"> Cf. <ref type="bibliography" target="#girshick_et_al_hierarchies_2014">Girshick et&#160;al. 2014</ref>; <ref type="bibliography" target="#girshick_rcnn_2015">Girshick 2015</ref>; <ref type="bibliography" target="#ren_et_al_rcnn_2016">Ren et&#160;al. 2016</ref>.</note> have dominated the field, offering strong accuracy at the cost of increased runtime. In contrast, one-stage methods like <term type="dh">You only look once (YOLO)</term><note type="footnote"> Cf. <ref type="bibliography" target="#redmon_et_al_you_2016">Redmon et&#160;al. 2016</ref>.</note> or <term type="dh">Single Shot MultiBox Detector (SSD)</term><note type="footnote"> Cf. <ref type="bibliography" target="#liu_et_al_ssd_2016">Liu et&#160;al. 2016</ref>.</note> have traditionally prioritized speed over precision, although the performance gap has narrowed with more recent architectures. Recently, transformer-based detection architectures, particularly those based on the <ref target="https://huggingface.co/docs/transformers/model_doc/detr">DEtection TRansformer (DETR)</ref>,<note type="footnote"> Cf. <ref type="bibliography" target="#carion_et_al_endtoend_2020">Carion 2020</ref>.</note> have become the dominant paradigm. However, one-stage and two-stage methods maintain their relevance. Notably, recent versions of YOLO continue to show competitive performances on many detection benchmarks and are very popular due to their easy usage, efficient training and inference times. For our purposes, the complexity and performance of modern detection benchmarks far exceed our demands. The task of detecting images in printed layouts is straightforward and can nearly be considered as a resolved problem within the field of computer vision. Consequently, we favor the <ref target="https://huggingface.co/Ultralytics/YOLOv8">YOLO-v8</ref><note type="footnote"> Cf. <ref type="bibliography" target="#jocher_et_al_ultralytics_2023">Jocher et&#160;al. 2023</ref>.</note> architecture over more powerful but slower modern two-stage or DETR-based architectures. </p>
               <p>While applying object detection to artworks is less common than in natural image analysis, it is recognized as a well-established practice within digital humanities. Pioneering work in object detection for artworks was conducted by Elliot J. Crowley and Andrew Zisserman. They trained detection algorithms on the photographic dataset <ref target="https://www.robots.ox.ac.uk/~vgg/projects/pascal/VOC/">Pascal VOC</ref><note type="footnote"> Cf. <ref type="bibliography" target="#everingham_et_al_pascal_2010">Everingham 2010</ref>.</note> and employed transfer learning to apply the trained algorithms for identifying objects such as vehicles or animals in artworks.<note type="footnote"> Cf. <ref type="bibliography" target="#crowley_zisserman_state_2014">Crowley&#160;/ Zisserman 2014</ref>; <ref type="bibliography" target="#crowley_zisserman_search_2015">Crowley&#160;/ Zisserman 2015</ref>; <ref type="bibliography" target="#crowley_zisserman_art_2016">Crowley&#160;/ Zisserman 2016</ref>.</note> Another line of research focused on the detection of smell-related objects in artworks,<note type="footnote"> Cf. <ref type="bibliography" target="#zinnen_et_al_learning_2022a">Zinnen et&#160;al. 2022a</ref>.</note> a topic addressed with the introduction of the <ref target="https://odor-challenge.github.io/2022/">ODOR</ref> challenge and dataset. Special cases of object detection, such as the detection of depicted faces<note type="footnote"> Cf. <ref type="bibliography" target="#bengamra_et_al_challenges_2021">Bengamra et&#160;al. 2021</ref>; <ref type="bibliography" target="#mermet_et_al_face_2020">Mermet et&#160;al. 2020</ref>.</note> and persons<note type="footnote"> Cf. <ref type="bibliography" target="#westlake_et_al_people_2016">Westlake et&#160;al. 2016</ref>.</note> have a great relevance for art history, e.g. for studying portraiture, figure painting, or genre painting. Westlake et&#160;al. introduced a dataset of annotated persons depicted across a wide range of artistic styles.<note type="footnote"> Cf. <ref type="bibliography" target="#westlake_et_al_people_2016">Westlake et&#160;al. 2016</ref>.</note> Closely related to that is the application of <term type="dh">pose estimation</term>, which typically requires an initial person detection stage. Pose estimation was used to analyze and cluster artworks based on body postures,<note type="footnote"> Cf. <ref type="bibliography" target="#impett_moretti_totentanz_2017">Impett&#160;/ Moretti 2017</ref>; <ref type="bibliography" target="#impett_suesstrunk_pose_2016">Impett&#160;/ Süsstrunk 2016</ref>; <ref type="bibliography" target="#bell_impett_ikonographie_2019">Bell&#160;/ Impett 2019</ref>; <ref type="bibliography" target="#impett_gesture_2020">Impett 2020</ref>.</note> hand gestures,<note type="footnote"> Cf. <ref type="bibliography" target="#bernasconi_et_al_2023">Bernasconi et&#160;al. 2023</ref>. </note> and overall image composition,<note type="footnote"> Cf. <ref type="bibliography" target="#madhu_et_al_structures_2020">Madhu et&#160;al. 2020</ref>; <ref type="bibliography" target="#madhu_et_al_icc_2023">Madhu et&#160;al. 2023</ref>.</note> or to recognize smell-related<note type="footnote"> Cf. <ref type="bibliography" target="#zinnen_et_al_sniffyart_2023">Zinnen et&#160;al. 2023</ref>.</note> and sensory<note type="footnote"> Cf. <ref type="bibliography" target="#zinnen_et_al_gestures_2025">Zinnen et&#160;al. 2025</ref>.</note> gestures.</p>
               <p>A common challenge for object detection in artworks is the sparsity of annotated datasets. Gonthier et&#160;al. addressed this by employing a weakly supervised training approach, using image-level labels before assessing the method on instance-level labels.<note type="footnote"> Cf. <ref type="bibliography" target="#gonthier_et_al_object_2018">Gonthier et&#160;al. 2018</ref>; <ref type="bibliography" target="#gonthier_et_al_instance_2022">Gonthier et al 2022</ref>.</note> Additionally, they shifted their focus from detecting modern categories to identifying objects with art historical relevance. This thread was picked up by Marinescu et&#160;al., who adapted <ref target="https://cocodataset.org/#home">COCO</ref> categories to be consistent with historical contexts,<note type="footnote"> Cf. <ref type="bibliography" target="#marinescu_et_al_object_2020">Marinescu et&#160;al. 2020</ref>. </note> and by Reshetnikov et&#160;al., who compiled a large dataset categorized by art historical themes.<note type="footnote"> Cf. <ref type="bibliography" target="#reshetnikov_et_al_deart_2022">Reshetnikov et&#160;al. 2023</ref>.</note>
               </p>
               <p>Another approach to address the challenge of data sparsity is the use of style transfer to mimic artistic object representations.<note type="footnote"> Cf. <ref type="bibliography" target="#sanakoyeu_et_al_content_2018">Sanakoyeu et&#160;al. 2018</ref>; <ref type="bibliography" target="#gatys_et_al_algorithm_2015">Gatys et&#160;al. 2015</ref>; <ref type="bibliography" target="#gatys_et_al_image_2016">Gatys et&#160;al. 2016</ref>. </note> This technique leverages existing annotations from photographic data to obtain an adaptation to the artistic target domain using transfer learning. This strategy is commonly employed for various tasks, including pose estimation,<note type="footnote"> Cf. <ref type="bibliography" target="#madhu_et_al_icc_2023">Madhu et&#160;al. 2023</ref>; <ref type="bibliography" target="#springstein_et_al_pose_2022">Springstein et&#160;al. 2022</ref>.</note> emotion recognition,<note type="footnote"> Cf. <ref type="bibliography" target="#patoliya_et_al_smell_2024">Patoliya et&#160;al. 2024</ref>.</note> or painting captioning,<note type="footnote"> Cf. <ref type="bibliography" target="#lu_et_al_image_2021">Lu et&#160;al. 2021</ref>.</note> and has also been successfully applied to object detection.<note type="footnote"> Cf. <ref type="bibliography" target="#jeon_et_al_object_2020">Jeon et&#160;al. 2020</ref>; <ref type="bibliography" target="#kadish_et_al_improving_2021">Kadish et&#160;al. 2021</ref>; <ref type="bibliography" target="#smirnov_eguizabal_learning_2018">Smirnov&#160;/ Eguizabal 2018</ref>.</note>
               </p>
               <p>The issue of data sparsity, at least for person detection and pose estimation, has been significantly mitigated with the introduction of the <ref target="https://github.com/idea-research/humanart">Human-Art</ref> dataset.<note type="footnote"> Cf. <ref type="bibliography" target="#ju_et_al_humanart_2023">Ju et&#160;al. 2023</ref>.</note> This dataset features 50,000 images of artistic creations like sculptures, paintings, or cartoons annotated with the position of persons and pose estimation keypoints.</p>
            </div>
            <div type="subchapter">
               <head>2.2 Image Retrieval</head>
               <p>The retrieval of similar depictions of identical objects from auction catalogs can be contextualized within several closely related research areas, with <term type="dh">Content-Based Image Retrieval (CBIR)</term> being the most prominent. CBIR, often used synonymously with ›Image Retrieval‹, can be defined as the problem of searching for relevant images in a database given a query image based on visual features.<note type="footnote"> Cf. <ref type="bibliography" target="#chen_et_al_learning_2022">Chen et&#160;al. 2022</ref>. </note>
               </p>
               <p>The retrieval process can also be framed as a problem of <term type="dh">Near Duplicate Detection (NDD)</term> when considering different depictions of the same object as near duplicates. Primarily aimed at identifying digital copies or manipulated images, NDD differs from CBIR in its application focus. While CBIR is a relatively open task that accommodates a wide range of queries and image similarities, NDD specifically targets the duplicate nature of the query and target images.<note type="footnote"> Cf. <ref type="bibliography" target="#thyagharajan_kalaiarsi_review_2021">Thyagharajan&#160;/ Kalaiarasi 2021</ref>.</note> Irrespective of whether it is labeled as NDD or CBIR, a retrieval system typically operates in two phases: A preparatory phase, where a database of quantifiable image representations is created, and an online phase, where this database is queried.<note type="footnote"> Cf. <ref type="bibliography" target="#zhou_et_al_advance_2017">Zhou et&#160;al. 2017</ref>.</note> These queries are not limited to image inputs alone; for example, a shared image-language embedding space, as provided by <term type="dh">Contrastive Language-Image Pre-Training</term> (<ref target="https://openai.com/index/clip/">CLIP</ref>),<note type="footnote"> Cf. <ref type="bibliography" target="#radford_et_al_models_2021">Radford et&#160;al. 2021</ref>.</note> enables cross-modal querying.<note type="footnote"> Cf. <ref type="bibliography" target="#garcia_vogiatzis_paintings_2018">Garcia&#160;/ Vogiatzis 2018</ref>.</note> However, in the context of our application, we are focusing on image queries. In this scenario, the online phase entails a feature extraction similar to that during the database creation. This process maps the query input into the same embedding space as the already processed image corpus, enabling the computation of distances between the query features and the previously extracted features to assess their similarity. As computing the feature distance across all stored feature vectors can become intractable for large image corpora, the embedding space is often clustered or indexed to enable efficient querying. Chen et&#160;al. categorize methods based on their reliance on off-the-shelf models, originally trained for different tasks than retrieval, versus methods that incorporate an additional training stage dedicated to fine-tuning the models specifically for image retrieval.<note type="footnote"> Cf. <ref type="bibliography" target="#chen_et_al_learning_2022">Chen et&#160;al. 2022</ref>.</note> In our application, we employ the simpler approach by using pre-trained models to extract features. Moving forward, we plan to improve our system by integrating more elaborate training schemes specifically designed for retrieval tasks.</p>
               <p>The process of fine-tuning models for retrieval is closely related to the field of metric learning. However, the term <term type="dh">metric learning</term> is often discussed more from a technical standpoint than from its practical application. Metric learning involves mapping data into an embedding space where similar data points are closer together and dissimilar data points are further apart, guided by a distance metric such as the <term type="dh">Euclidean norm</term>.<note type="footnote"> Cf. <ref type="bibliography" target="#musgrave_et_al_learning_2020">Musgrave et&#160;al. 2020</ref>.</note> Metric learning can be used for many purposes such as classifying samples according to their nearest neighbors in the embedding space or in self-supervised pre-training. However, retrieval can be considered its most natural application as it typically entails the search for the closest query result in the feature space. This way, retrieval can be framed as an instance-level open-set classification, where the query has to be matched to its closest neighbors in the feature space. </p>
               <p>Independent of whether models are specifically trained for retrieval is the consideration of which features are used for the distance computation. Before <term type="dh">deep learning</term>, approaches typically relied on local features obtained with methods such as <term type="dh">Scale-Invariant Feature Transform (SIFT)</term>.<note type="footnote"> Cf. <ref type="bibliography" target="#lowe_image_2004">Lowe 2004</ref>.</note> These local features can be aggregated into a global descriptor using methods like <term type="dh">Bag-of-visual-Words (BoW)</term>.<note type="footnote"> Cf. <ref type="bibliography" target="#csurka_et_al_categorization_2004">Csurka et&#160;al. 2004</ref>.</note> Such combinations of classical local features and their global aggregation have been applied in various contexts, including the recognition of CD-covers,<note type="footnote"> Cf. <ref type="bibliography" target="#nister_stewenius_recognition_2006">Nistér&#160;/ Stewénius 2006</ref>.</note> object retrieval<note type="footnote"> Cf. <ref type="bibliography" target="#philbin_et_al_quantization_2008">Philbin et&#160;al. 2008</ref>.</note> and product search.<note type="footnote"> Cf. <ref type="bibliography" target="#he_et_al_product_2012">He 2012</ref>.</note>
               </p>
               <p>The question of feature aggregation remains relevant in modern, deep-learning-based approaches: Even when employing neural networks, various feature representations can be considered, involving different scales<note type="footnote"> For example <ref type="bibliography" target="#sun_et_al_object_2015">Sun et&#160;al. 2015</ref> or <ref type="bibliography" target="#tolias_et_al_object_2015">Tolias et&#160;al. 2015</ref>.</note> (intra-model) or different models<note type="footnote"> Cf. <ref type="bibliography" target="#yokoo_et_al_re-ranking_2020">Yokoo et al. 2020</ref>.</note> (inter-model). Effective fusion of different feature levels can lead to a more meaningful feature representation.<note type="footnote"> Cf. <ref type="bibliography" target="#chen_et_al_learning_2022">Chen et&#160;al. 2022</ref>.</note> For our application, a comparatively simple strategy is sufficient. We simply use the flattened output of the last layer of a pre-trained <term type="dh">Convolutional Neural Network (CNN)</term> to represent the image contents and compute similarities. Future research could explore different scales and more elaborate feature fusion, but also revisit classical feature extraction methods such as SIFT, and analyze their impact on the type of similarity encoded in the query results.</p>
               <p>In the fields of digital humanities and computational cultural heritage, research formerly centered around the visual retrieval of similar images or image parts to identify patterns across numerous artworks and uncover relations between them.<note type="footnote"> Cf. <ref type="bibliography" target="#seguin_et_al_link_2016">Seguin et&#160;al. 2016</ref>; <ref type="bibliography" target="#ufer_et_al_retrieval_2021">Ufer et&#160;al. 2021</ref>; <ref type="bibliography" target="#shen_et_al_patterns_2019">Shen et&#160;al. 2019</ref>; <ref type="bibliography" target="#castellano_et_al_link_2021">Castellano et&#160;al. 2021</ref>.</note> Eventually, projects and works also focused on the development of interfaces to search for images or image parts.<note type="footnote"> Cf. <ref type="bibliography" target="#ufer_et_al_retrieval_2021">Ufer et&#160;al. 2021</ref>; <ref type="bibliography" target="#springstein_et_al_iart_2021">Springstein et&#160;al. 2021</ref>; <ref type="bibliography" target="#offert_bell_imgsai_2024">Offert&#160;/ Bell 2024</ref>; <ref type="bibliography" target="#portapp_2025">PortApp 2025</ref>.</note> Besides feature vector distances, works also explored the application of different metrics to capture diverse aspects of image similarity, for example color concepts,<note type="footnote"> Cf. <ref type="bibliography" target="#yelizaveta_et_al_analysis_2005">Yelizaveta et&#160;al. 2005</ref>.</note> image composition,<note type="footnote"> Cf. <ref type="bibliography" target="#madhu_et_al_icc_2023">Madhu et&#160;al. 2023</ref>.</note> body posture,<note type="footnote"> Cf. <ref type="bibliography" target="#impett_moretti_totentanz_2017">Impett&#160;/ Moretti 2017</ref>; <ref type="bibliography" target="#bell_impett_ikonographie_2019">Bell&#160;/ Impett 2019</ref>.</note> or even symbolic meaning.<note type="footnote"> Cf. <ref type="bibliography" target="#sartini_et_al_icon_2023">Sartini et&#160;al. 2023</ref>; <ref type="bibliography" target="#sartini_gangemi_symbolism_2021">Sartini&#160;/ Gangemi 2021</ref>.</note>
               </p>
               <p>The wealth of works which apply computational methods for art analysis and understanding is reflected in several review papers. While Bengamra et&#160;al.<note type="footnote"> Cf. <ref type="bibliography" target="#bengamra_et_al_survey_2024">Bengamra et&#160;al. 2024</ref>.</note> provide a summary of significant computer vision applications for art, they specifically focus on object detection. An overview of datasets and works on the task of recognizing and extracting patterns in visual arts using deep learning is given in a paper by Giovanna Castellano and Gennaro Vessio.<note type="footnote"> Cf. <ref type="bibliography" target="#castellano_vessio_overview_2021">Castellano&#160;/ Vessio 2021</ref>.</note> Another review provides an overview of how computational methods are used for classification, object detection, similarity retrieval or multimodal representations, among others.<note type="footnote"> Cf. <ref type="bibliography" target="#cetinic_she_understanding_2022">Cetinic&#160;/ She 2022</ref>. </note> Amalia Foka then presented past computer vision applications for art historical research and future possibilities.<note type="footnote"> Cf. <ref type="bibliography" target="#foka_computer_2021">Foka 2021</ref>.</note>
               </p>
            </div>
            <div type="subchapter">
               <head>2.3 Digital Art Market &amp; Provenance Studies</head>
               <p>Computational methods have also been applied to art market research: Utilizing a subset of over 267,000 sale transactions from the <ref target="https://www.getty.edu/research/provenance/">Getty Provenance Index</ref> and complex network science, Schich et&#160;al. studied the history of the art market and collection dynamics to reveal social, temporal, and spatial networks.<note type="footnote"> Cf. <ref type="bibliography" target="#schich_et_al_network_2017">Schich et&#160;al. 2017</ref>.</note> Fletcher et&#160;al. studied the art market in London between 1850 and 1914 on the basis of complementary datasets and visualizations.<note type="footnote"> Cf. <ref type="bibliography" target="#fletcher_et_al_mapping_2012">Fletcher et&#160;al. 2012</ref>.</note> Scheithauer et&#160;al. suggested a two-step pipeline to analyze the layout and content of auction sales catalogs utilizing object detection and text sequence labeling models.<note type="footnote"> Cf. <ref type="bibliography" target="#scheithauer_et_al_auction_2024">Scheithauer et&#160;al. 2024</ref>.</note> A similar approach and goal to the one presented in this paper was announced in 2021. Then, the Fraunhofer Institute published a report which informed about a feasibility study. The study developed AI-methods for image search in auction catalogs, enabling a successful comparison between current and historical images.<note type="footnote"> Cf. <ref type="bibliography" target="#vicente-garcia_ersten_2021">Vicente-Garcia 2021</ref>.</note> A second feasibility study, which again highlighted the success of the methods, was published in the magazine <title>Museumskunde</title> in 2024.<note type="footnote"> Cf. <ref type="bibliography" target="#vicente-garcia_suche_2024">Vicente-Garcia 2024</ref>. </note> In the humanities, the art market has been a research topic for many years. Numerous publications on the art market between 1901 and 1945 in Germany and other German-speaking countries focus on individual art dealers<note type="footnote"> Cf. <ref type="bibliography" target="#hoffmann_kuhn_kunsthaendler_2016">Hoffmann&#160;/ Kuhn 2016</ref>.</note>, auction houses<note type="footnote"> Cf. <ref type="bibliography" target="#hopp_kunsthandel_2012">Hopp 2012</ref>.</note>, or the valuation and price development.<note type="footnote"> Cf. <ref type="bibliography" target="#jeuthe_kunstwerte_2014">Jeuthe 2014</ref>. </note> An overview of relevant literature is given in the bibliographies <title>German Sales 1901–1929</title>
                  <note type="footnote"> Cf. <ref type="bibliography" target="#bommert_sales_2019">Bommert 2019</ref>.</note> and <title>German Sales 1930–1945</title>.<note type="footnote"> Cf. <ref type="bibliography" target="#baehr_sales_2013">Bähr 2013</ref>.</note>
               </p>
               <p>The relatively new field of digital provenance research studies the impact of digitality on provenance research, focusing on chances and challenges, and uses digital methods for analyzing provenance data.<note type="footnote"> Cf. <ref type="bibliography" target="#lang_provenienzforschung_2023a">Lang 2023a</ref>. </note> Works report on the development of research databases<note type="footnote"> Cf. <ref type="bibliography" target="#werner_proveana_2020">Werner 2020</ref>. </note>, the presentation and communication of provenance information and research results online<note type="footnote"> Cf. <ref type="bibliography" target="#haffner_provenienzen_2020">Haffner 2020</ref>; <ref type="bibliography" target="#haffner_provenienzforschung_2019">Haffner 2019</ref>. </note>, the future of provenance research and digital infrastructures in Germany, especially focusing on tendencies and desiderata.<note type="footnote"> Cf. <ref type="bibliography" target="#hopp_provenienzrecherche_2018">Hopp 2018</ref>. </note> Special attention has been paid to aspects of incompleteness and vagueness in provenance research.<note type="footnote"> Cf. <ref type="bibliography" target="#lang_gap_2023b">Lang 2023b</ref>; <ref type="bibliography" target="#mariani_paalen_2022">Mariani 2022</ref>. </note> Rother et&#160;al. study the transformation of unstructured provenance records into <term type="dh">Linked Open Data</term> and how computer-based methods can be utilized for a comprehensive analysis of provenance records.<note type="footnote"> Cf. <ref type="bibliography" target="#rother_et_al_value_2023">Rother et&#160;al. 2023</ref>; <ref type="bibliography" target="#rother_et_al_care_2022">Rother et&#160;al. 2022</ref>. </note>
               </p>               
            </div>
         </div>         
         <div type="chapter">
            <head>3. German Sales</head>
            <p>The database <title>German Sales</title> was launched in 2013 and currently holds circa 15,000 digitized sales and auction catalogs mostly from German-speaking countries and bibliographic metadata. Various projects contributed to the development of <title>German Sales</title>. The initial project started in 2010 with a collaboration between the Getty Research Institute, Heidelberg University Library, and the Art Library in Berlin. Back then, the aim was to provide (online) access to auction catalogs held and preserved throughout Germany, Austria, and Switzerland for the period from 1930 to 1945.<note type="footnote"> Cf. <ref type="bibliography" target="#huemer_sales_2014">Huemer 2014</ref>, pp.&#160;273–278.</note> Subsequent sub-projects focused on different time periods or locations and not only included sources relevant to the secondary but also primary market such as gallery catalogs or stock books.<note type="footnote"> Cf. <ref type="bibliography" target="#germansales_project_2024">German Sales: Project description 2004</ref>.</note> All catalogs are published in open access and accessible through a full text search. The database itself offers different views on the data (see <ref type="graphic" target="#aiding_provenance_002">Fig.&#160;2</ref>): Users can activate and deactivate the metadata block, overview, facsimile, or OCR full text depending on their preferences and interests. The catalogs are very heterogeneous in content and form: They vary in length, layout, and with regard to the information they contain. Most catalogs include a cover page, an introduction, and illustrations of the included lots. These illustrations vary in size, rotation and quality and are embedded within the text or printed on separate pages at the end of the catalogs. All catalogs contain, to varying extents, a list of the lots with information about the title, artist, date, measurements, technique, and (sometimes) a description (see <ref type="graphic" target="#aiding_provenance_003">Fig.&#160;3</ref>). </p>
            <figure>
               <graphic xml:id="aiding_provenance_002" url="Medien/aiding_provenance_002.png">
                  <desc>
                     <ref type="intern" target="#abb2">Figure&#160;2</ref>: <title>German Sales</title> offers different views of the data. Users can choose between the metadata, overview, facsimile, or OCR full text. [Screenshot: Mathias Zinnen&#160;/ Sabine Lang 2024]</desc>
               </graphic>
            </figure>
            <figure>
               <graphic xml:id="aiding_provenance_003" url="Medien/aiding_provenance_003.png">
                  <desc>
                     <ref type="intern" target="#abb3">Figure&#160;3</ref>: Exemplary pages from different catalogs showing their heterogeneity regarding, for example, the number of images per page, the variety of offered lots, color of the pages and placement within the catalogs. [Graphic: Mathias Zinnen&#160;/ Sabine Lang 2024, image source: <ref type="bibliography" target="#germansales_2025">German Sales 2025</ref>]</desc>
               </graphic>
            </figure>
            <p>The lots themselves are also very heterogeneous and include paintings, drawings, prints, sculptures, books, ceramics, furniture, and other objects. <ref type="graphic" target="#aiding_provenance_004">Fig.&#160;4</ref> visualizes the types of offered objects and their frequency as a word cloud, created using the keywords provided for each catalog in the bibliographies of Astrid Bähr and Britta Bommert.<note type="footnote">Cf. <ref type="bibliography" target="#baehr_sales_2013">Bähr 2013</ref>; <ref type="bibliography" target="#bommert_sales_2019">Bommert 2019</ref>. </note> While we focus on paintings in this work, the heterogeneity of object types provides an excellent opportunity to extend our approach to different kinds of auction objects in the future.</p>
            <figure>
               <graphic xml:id="aiding_provenance_004" url="Medien/aiding_provenance_004.png">
                  <desc>
                     <ref type="intern" target="#abb4">Figure&#160;4</ref>: Word cloud visualizing the frequency of sales&#160;/ object types found in the German Sales bibliographies. [Visualization: Mathias Zinnen&#160;/ Sabine Lang 2024]</desc>
               </graphic>
            </figure>
         </div>
         <div type="chapter">
            <head>4. Methodology</head>
            <p>In the following sections we present our methodology which enables searches for identical and similar images in a data set containing auction catalogs from German Sales. We first describe the necessary data preprocessing steps, then describe the feature-based retrieval and conclude with some remarks on the demo-app which allows users to test the described method.</p>
            <div type="subchapter">
               <head>4.1 Data Preprocessing</head>
               <p>Before we can store the feature representations of objects depicted in the auction catalogs, we must prepare the catalogs and crop the illustrations. First, we parse the PDF-files of the two bibliographies<note type="footnote"> Cf. <ref type="bibliography" target="#baehr_sales_2013">Bähr 2013</ref>; <ref type="bibliography" target="#bommert_sales_2019">Bommert 2019</ref>.</note> and convert the information into a structured, machine-readable format. <ref type="graphic" target="#tab001">Table&#160;1</ref> lists the extracted metadata fields describing the catalogs.</p>
               <table xml:id="tab001">
                  <row>
                     <cell>
                        <hi rend="bold">Field Name</hi>
                     </cell>
                     <cell>
                        <hi rend="bold">Explanation</hi>
                     </cell>
                  </row>
                  <row>
                     <cell>title</cell>
                     <cell>Title of the catalog in the bibliography.</cell>
                  </row>
                  <row>
                     <cell>location</cell>
                     <cell>Place where the auction took place.</cell>
                  </row>
                  <row>
                     <cell>year</cell>
                     <cell>Year of the auction. Exact dates are transformed to years.</cell>
                  </row>
                  <row>
                     <cell>types</cell>
                     <cell>List of object types offered in the auction as specified in the bibliography.</cell>
                  </row>
                  <row>
                     <cell>uri</cell>
                     <cell>Permanent link to the catalog entry in <title>German Sales</title>.</cell>
                  </row>
                  <row>
                     <cell>fn</cell>
                     <cell>File name of the downloaded catalog PDF used for further processing.</cell>
                  </row>
                  <trailer>
                     <ref type="intern" target="#tab1">Table&#160;1</ref>: List of metadata extracted from the auction catalog bibliographies (cf. <ref type="bibliography" target="#baehr_sales_2013">Bähr 2013</ref>; <ref type="bibliography" target="#bommert_sales_2019">Bommert 2019</ref>).
                  </trailer>
               </table>
               <p>In a second step, we filter the catalogs using the metadata and process them further. Due to computational constraints, we initially limit our scope to a smaller number of catalogs, focusing on locations in Switzerland. Additionally, we narrow our selection to catalogs covering sales of ›Gemälde‹ as specified by the keywords in the bibliographies. This process results in a set of 86 catalogs, with an additional 25 catalogs which are included later for evaluation. We plan to extend the dataset in future work, eventually covering all catalogs in <title>German Sales</title>. </p>
               <p>Using the links provided in the bibliographies, we download the catalogs from the online services of the Heidelberg Library and convert each page to an image file using the <ref target="https://github.com/pymupdf/PyMuPDF">PyMuPDF</ref>
                  <note type="footnote"> Cf. <ref type="bibliography" target="#pymupdf_2024">PyMuPDF 2024</ref>.</note> library. To minimize memory use, we store the pages as JPEG-files with 95&#160;% export quality compression. Subsequently, we employ the <ref target="https://huggingface.co/Ultralytics/YOLOv8">YOLO-v8x</ref><note type="footnote"> Cf. <ref type="bibliography" target="#jocher_et_al_ultralytics_2023">Jocher et&#160;al. 2023</ref>.</note> object detection algorithm to automatically detect images of objects on the pages, crop the detected images and save them separately. To train the detection algorithm, we manually label a set of 181 catalog pages with the position for 316 depicted objects. We split 33 for validation (containing 60 objects), and train the algorithm with the remaining 148 annotated pages (256 objects). Finally, the cropped object depictions are converted to grayscale and rotated to align with the longer edge, ensuring a consistent representation in terms of print color and orientation. <ref type="graphic" target="#aiding_provenance_005">Fig.&#160;5</ref> provides a complete overview of the preprocessing steps described above. </p>
               <figure>
                  <graphic xml:id="aiding_provenance_005" url="Medien/aiding_provenance_005.png">
                     <desc>
                        <ref type="intern" target="#abb5">Figure&#160;5</ref>: Complete pipeline of preprocessing steps applied to the auction catalogs prior to extracting features from the depicted objects. [Graphic: Mathias Zinnen&#160;/ Sabine Lang 2024]</desc>
                  </graphic>
               </figure>
            </div>
            <div type="subchapter">
               <head>4.2 Feature-Based Retrieval</head>
               <figure>
                  <graphic xml:id="aiding_provenance_006" url="Medien/aiding_provenance_006.png">
                     <desc>
                        <ref type="intern" target="#abb6">Figure&#160;6</ref>: Process employed to perform an image-based search for identical or similar images in auction catalogs. [Graphic: Mathias Zinnen&#160;/ Sabine Lang 2024]</desc>
                  </graphic>
               </figure>
               <p><ref type="graphic" target="#aiding_provenance_006">Fig.&#160;6</ref> illustrates the process of feature extraction and query employed for the reverse image search in the auction catalogs. To prepare the system, the previously cropped images from the auction catalogs are used as an input to various feature extraction methods to compile a database of feature vectors as shown in the top row of Fig. 6. Specifically, we employ three different <ref target="https://huggingface.co/microsoft/resnet-50">ResNet-50</ref><note type="footnote"> Cf. <ref type="bibliography" target="#he_et_al_learning_2016">He et&#160;al. 2016</ref>.</note> feature extractors, each pre-trained for different tasks:</p>
               <list type="ordered">
                  <item>Classification of 1,000 classes in the ImageNet<note type="footnote"> Cf. <ref type="bibliography" target="#deng_et_al_imagenet_2009">Deng et&#160;al. 2009</ref>, model weights obtained from the mmpretrain framework (cf. <ref type="bibliography" target="#openmmlab_pretraining_2024a">OpenMMLab Pre-Training 2024</ref>).</note> dataset,</item>
                  <item>Detection of smell-related objects in a dataset of historical artworks (ODOR)<note type="footnote"> Cf. <ref type="bibliography" target="#zinnen_et_al_odor_2022">Zinnen et&#160;al. 2022b</ref>; <ref type="bibliography" target="#zinnen_et_al_gestures_2025">Zinnen et&#160;al. 2025</ref>, models trained by the authors.</note>,</item>
                  <item>Recognition of 17 pose estimation keypoints in the COCO<note type="footnote"> Cf. <ref type="bibliography" target="#lin_et_al_microsoft_2014">Lin et&#160;al. 2014</ref>, models obtained from the mmpose framework (cf. <ref type="bibliography" target="#openmmlab_pose_2024b">OpenMMLab Pose 2024</ref>). </note> dataset.</item>
               </list>
               <p>The motivation behind selecting these three pre-training schemes is to evaluate whether the choice of extraction method can influence which images are found to be similar. The underlying hypothesis is that the embeddings extracted from the images can be related back to the task the extraction model was originally trained for. Accordingly, pose recognition embeddings would be expected to emphasize the body posture of depicted persons. ImageNet embeddings, on the other hand, have been shown to generalize towards various tasks and put a specific emphasis on texture versus shape.<note type="footnote"> Cf. <ref type="bibliography" target="#geirhos_et_al_2018">Geirhos et&#160;al. 2018</ref>.</note> The model trained for object detection in premodern artworks was selected to test whether the availability of artistic object representations in the training data supports the models in extracting more relevant embeddings for artwork similarity.</p>
               <p>After deploying the application, users can upload arbitrary images to the system. These images are fed to the feature extractor, similarly to the process during the creation of the feature database. Afterwards, we compute vector distances <formula notation="mathml">
                     <math xmlns="http://www.w3.org/1998/Math/MathML">
                        <mrow>
                           <mi>d</mi>
                           <mspace width="0.167em"/>
                           <mrow>
                              <mo fence="true" form="prefix" stretchy="true">(</mo>
                              <mrow>
                                 <mrow>
                                    <msub>
                                       <mi>v</mi>
                                       <mi>q</mi>
                                    </msub>
                                    <mi>,&#160;</mi>
                                    <msub>
                                       <mi>v</mi>
                                       <mi>i</mi>
                                    </msub>
                                 </mrow>
                              </mrow>
                              <mo fence="true" form="postfix" stretchy="true">)</mo>
                           </mrow>
                        </mrow>
                     </math>
               </formula> between the query vector <formula notation="mathml">
                  <math xmlns="http://www.w3.org/1998/Math/MathML">
                     <msub>
                        <mi>v</mi>
                        <mi>q</mi>
                     </msub>
                  </math>
               </formula> and the <formula notation="mathml">
                  <math xmlns="http://www.w3.org/1998/Math/MathML"><mi>n</mi></math>
               </formula> precomputed vectors <formula notation="mathml">
                  <math xmlns="http://www.w3.org/1998/Math/MathML"><mrow>
                        <msub>
                           <mi>v</mi>
                           <mi>i</mi>
                        </msub>
                        <mi>,&#160;</mi>
                        <mi>i</mi>
                        <mi>&#160;∈&#160;</mi>
                        <mrow>
                           <mo fence="true" form="prefix" stretchy="true">{</mo>
                           <mrow>
                              <mrow>
                                 <mn>1,&#160;2</mn>
                                 <mi>,&#160;</mi>
                                 <mn>...</mn>
                                 <mi>,&#160;</mi>
                                 <mi>n</mi>
                              </mrow>
                           </mrow>
                           <mo fence="true" form="postfix" stretchy="true">}</mo>
                        </mrow>
                  </mrow></math>
               </formula> and return the <formula notation="mathml">
                  <math xmlns="http://www.w3.org/1998/Math/MathML"><mi>k</mi></math>
               </formula> images <formula notation="mathml">
                  <math xmlns="http://www.w3.org/1998/Math/MathML"><mrow>
                        <msub>
                           <mi>I</mi>
                           <mrow>
                              <mn>1</mn>                              
                           </mrow>
                        </msub>
                        <mi>,&#160;</mi>
                        <msub>
                           <mi>I</mi>
                           <mn>2</mn>
                        </msub>
                        <mi>,&#160;</mi>
                        <mn>...</mn>
                        <mi>,&#160;</mi>
                        <msub>
                           <mi>I</mi>
                           <mi>k</mi>
                        </msub>
                  </mrow></math>
               </formula> where the distance between the feature and query vectors are lowest (see bottom row of <ref type="graphic" target="#aiding_provenance_006">Fig.&#160;6</ref>).</p>
               <p>Our approach simplifies the process of image retrieval by using the flattened final feature vector obtained just before the final classification layer of the ResNet-50 architecture, which typically has 2048 dimensions. Incorporating more elaborate strategies and vector representations that account for multiple scales might further enhance performance. This improvement is a potential direction for future work, although it is beyond the scope of this current study.</p>
               <p>For efficient querying, we implement <ref target="https://github.com/facebookresearch/faiss">FAISS (Facebook AI Similarity Search)</ref><note type="footnote"> Cf. <ref type="bibliography" target="#johnson_et_al_similarity_2019">Johnson et&#160;al. 2019</ref>.</note>, which performs initial clustering and hashing in the feature space to speed up the search process. The similarity between vectors is computed using the <term type="dh">Euclidean Distance</term> formula:</p>
               <p><formula notation="mathml">
                     <math xmlns="http://www.w3.org/1998/Math/MathML">
                        <mrow>
                           <mi>d</mi>
                           <mspace width="0.167em"/>
                           <mrow>
                              <mo fence="true" form="prefix" stretchy="true">(</mo>
                              <mrow>
                                 <mrow>
                                    <msub>
                                       <mi>v</mi>
                                       <mi>q</mi>
                                    </msub>
                                    <mi>,&#160;</mi>
                                    <msub>
                                       <mi>v</mi>
                                       <mi>r</mi>
                                    </msub>
                                 </mrow>
                              </mrow>
                              <mo fence="true" form="postfix" stretchy="true">)</mo>
                           </mrow>
                           <mtext>&#160;=&#160;</mtext>
                           <mrow>
                              <munderover>
                                 <mo stretchy="false">∑</mo>
                                 <mrow>
                                    <mi>i</mi>
                                    <mtext>&#160;=&#160;</mtext>
                                    <mn>1</mn>
                                 </mrow>
                                 <mn>2048</mn>
                              </munderover>
                              <msqrt>
                                 <msup>
                                    <mrow>
                                       <mo stretchy="false">&#160;(</mo>
                                       <msub>
                                          <mi>q</mi>
                                          <mi>i</mi>
                                       </msub>
                                       <mtext>&#160;-&#160;</mtext>
                                       <msub>
                                          <mi>r</mi>
                                          <mi>i</mi>
                                       </msub>
                                       <mo stretchy="false">)</mo>
                                    </mrow>
                                    <mn>2</mn>
                                 </msup>
                              </msqrt>
                           </mrow>
                     </mrow>
                     </math>
               </formula>, </p>
               <p>where <hi rend="italic">q<hi rend="sub">i</hi></hi> and <hi rend="italic">r<hi rend="sub">i</hi></hi> denote the <hi rend="italic">i</hi>-th element of the feature vectors <hi rend="italic">v<hi rend="sub">q</hi></hi> and <hi rend="italic">v<hi rend="sub">r</hi></hi> extracted from the query images <hi rend="italic">I<hi rend="sub">q</hi></hi> and <hi rend="italic">I<hi rend="sub">r</hi></hi>, respectively.</p>
            </div>
            <div type="subchapter">
               <head>4.3 A User-Interface for Image Search in Auction Catalogs  </head>
               <p>In order to test our method and eventually enable its use by provenance researchers, we develop a user-interface with the open source package <ref target="https://www.gradio.app/">Gradio</ref>.<note type="footnote"> Cf. <ref type="bibliography" target="#abid_et_al_gradio_2019">Abid et&#160;al. 2019</ref>.</note>
                  <ref type="graphic" target="#aiding_provenance_007">Fig.&#160;7</ref> shows a screenshot of the interface and an exemplary search. Users first select a query image either by simply uploading it or by using the drag-and-drop functionality. They can then select one of the three models discussed above to determine the feature embedding used to compute image similarities. Before users initiate the search, they can adjust the number of results on the right hand side of the interface. After the search is started, the most similar images to the query are computed using Euclidean Distance (see <ref type="intern" target="#pid31">paragraph 31–33</ref> for more details). Eventually, the search results are displayed underneath the query with additional metadata, including the DOIs of the respective catalogs. </p>
               <figure>
                  <graphic xml:id="aiding_provenance_007" url="Medien/aiding_provenance_007.png">
                     <desc>
                        <ref type="intern" target="#abb7">Figure&#160;7</ref>: Easy-to-use interface developed by using the open source software <ref target="https://www.gradio.app/">Gradio</ref>. [Screenshot: Mathias Zinnen&#160;/ Sabine Lang 2024]</desc>
                  </graphic>
               </figure>
            </div>
         </div>
         <div type="chapter">
            <head>5. Experiments</head>
            <p>We quantitatively measure the performance of the proposed retrieval system for images of objects using common metrics defined below. Furthermore, we assess the performance of the preparatory image detection step.</p>
            <div type="subchapter">
               <head>5.1 Detecting Images in Auction Catalogs </head>
               <p>To measure the performance of the image detection step, we apply <term type="dh">mean average precision</term> as defined in the COCO challenge (<term type="dh">COCO mAP</term>).<note type="footnote"> Cf. <ref type="bibliography" target="#lin_et_al_microsoft_2014">Lin et&#160;al. 2014</ref>.</note> COCO mAP is the most widely used evaluation metric for object detection algorithms to date and realizes a trade-off between precision and recall of detected objects by averaging over multiple confidence and overlap thresholds.<note type="footnote"> A detailed definition is beyond the scope of this work and can be found at <ref type="bibliography" target="#coic_2024">Common Objects in Context 2024</ref>. </note> Using the training and validation splits defined in <ref type="intern" target="#hd8">Section 4.1</ref>, we train the detection algorithm for 50 epochs. We then evaluate the model on the 33 unseen validation pages and achieve a COCO mAP of 98.6&#160;%. Exemplary predictions from the validation split are illustrated in <ref type="graphic" target="#aiding_provenance_008">Fig.&#160;8</ref>.</p>
               <figure>
                  <graphic xml:id="aiding_provenance_008" url="Medien/aiding_provenance_008.png">
                     <desc>
                        <ref type="intern" target="#abb8">Figure&#160;8</ref>: Exemplary predictions from the image detection stage: most images are reliably detected. However, the algorithm has problems differentiating between different object types&#160;/ elements as evidenced by detected carpets, sculptures, and ornamented text (bottom right). [Graphic: Mathias Zinnen&#160;/ Sabine Lang 2024, image source: <ref type="bibliography" target="#germansales_2025">German Sales 2025</ref>] </desc>
                  </graphic>
               </figure>
               <p>The detected depictions of objects other than paintings could likely be corrected by integrating another training stage specifically targeted at recognizing objects which are not paintings. However, in our use case we can tolerate these false predictions as they will not produce visual features similar to any query image. Thus, we do not expect any negative impact apart from a slightly increased memory usage. Generally, the examples visually confirm the strong performance of the detection system, highlighting that the detection of images in auction catalogs is an easy-to-solve problem for modern detection algorithms.</p>
            </div>
            <div type="subchapter">
               <head>5.2 Retrieval</head>
            
            <div type="subchapter">
               <head>5.2.1 Metrics</head>
               <p>We measure the performance of our retrieval system using <term type="dh">top-1 accuracy</term>, <term type="dh">top-5 accuracy</term>, and <term type="dh">mean average precision for retrieval (retrieval mAP)</term>. <term type="dh">Top-k accuracy</term> reflects the percentage of evaluation queries where a target image ranks among the first <hi rend="italic">k</hi> suggestions. For this study, we particularly focus on top-1 and top-5 accuracies, which, in our case, measure how often we find a depiction of the query image in the auction catalogs as the first result or among the first five results, respectively. Retrieval mAP is a standard metric in retrieval tasks that balances precision and recall by averaging the precision achieved at various recall levels <hi rend="italic">r</hi>, similar to COCO mAP<note type="footnote"> Cf. <ref type="bibliography" target="#lin_et_al_microsoft_2014">Lin et&#160;al. 2014</ref>.</note>.</p>
               <p>Precision at <hi rend="italic">r</hi> is the proportion of target images correctly identified in the first <hi rend="italic">r</hi> retrieval results. Conversely, recall is the ratio of all target images successfully retrieved by the algorithm. As the number of relevant predictions <hi rend="italic">r</hi> increases, the precision typically decreases while the recall increases. This is because an expansion of the result set increases the probability that target images are included. Consequently, retrieval mAP is computed as the average of precisions for multiple values of considered retrieval results <hi rend="italic">r</hi>. </p>
               <p>Instead of considering all possible ranks <hi rend="italic">k</hi>, we only consider up to 50 retrieval results (mAP@50). Accordingly, we compute the average precision (AP) for each query artwork <formula notation="mathml">
                     <math xmlns="http://www.w3.org/1998/Math/MathML"><mi>q</mi></math>
               </formula> as follows:</p>
               <p><formula notation="mathml">
                     <math xmlns="http://www.w3.org/1998/Math/MathML"><msub>
                           <mi mathvariant="italic">AP</mi>
                           <mi>q</mi>
                     </msub></math>
               </formula>&#160;= <formula notation="mathml">
                  <math xmlns="http://www.w3.org/1998/Math/MathML"><mrow>
                        <mfrac>
                           <mn>1</mn>
                           <mi>R</mi>
                        </mfrac>
                        <mrow>
                           <munderover>
                              <mo stretchy="false">∑</mo>
                              <mrow>
                                 <mi>r</mi>
                                 <mtext>&#160;=&#160;</mtext>
                                 <mn>1</mn>
                              </mrow>
                              <mn>50</mn>
                           </munderover>
                           <mrow>
                              <msub>
                                 <mi>P</mi>
                                 <mi>q</mi>
                              </msub>
                              <mrow>
                                 <mo fence="true" form="prefix" stretchy="true">(</mo>
                                 <mrow>
                                    <mi>r</mi>
                                 </mrow>
                                 <mo fence="true" form="postfix" stretchy="true">)</mo>
                              </mrow>
                              <mtext>&#160;*&#160;</mtext>
                              <msub>
                                 <mi mathvariant="italic">rel</mi>
                                 <mi>q</mi>
                              </msub>
                              <mrow>
                                 <mo fence="true" form="prefix" stretchy="true">(</mo>
                                 <mrow>
                                    <mi>r</mi>
                                 </mrow>
                                 <mo fence="true" form="postfix" stretchy="true">)</mo>
                              </mrow>
                           </mrow>
                        </mrow>
                  </mrow></math>
               </formula>,</p>
               <p>where:</p>
               <list type="unordered">
                  <item><formula notation="mathml"><math xmlns="http://www.w3.org/1998/Math/MathML">
                           <mi>R</mi>
                  </math></formula> is the number of relevant matches in the corpus, </item>
                  <item><formula notation="mathml"><math xmlns="http://www.w3.org/1998/Math/MathML">
                           <mrow>
                              <msub>
                                 <mi>P</mi>
                                 <mi>q</mi>
                              </msub>
                              <mrow>
                                 <mo fence="true" form="prefix" stretchy="true">(</mo>
                                 <mrow>
                                    <mi>r</mi>
                                 </mrow>
                                 <mo fence="true" form="postfix" stretchy="true">)</mo>
                              </mrow>
                           </mrow>
                  </math></formula> is the precision at <hi rend="italic">r</hi> (i.e. the fraction of correctly retrieved images at rank <hi rend="italic">r)</hi>
                  </item>
                  <item><formula notation="mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><mrow>
                              <msub>
                                 <mi mathvariant="italic">rel</mi>
                                 <mi>q</mi>
                              </msub>
                              <mrow>
                                 <mo fence="true" form="prefix" stretchy="true">(</mo>
                                 <mrow>
                                    <mi>r</mi>
                                 </mrow>
                                 <mo fence="true" form="postfix" stretchy="true">)</mo>
                              </mrow>
                  </mrow></math></formula> is an indicator function that returns 1 if the retrieved image at rank <hi rend="italic">r</hi> is a depiction of the same image and 0 otherwise.<note type="footnote"> Cf. <ref type="bibliography" target="#zhou_et_al_advance_2017">Zhou et&#160;al. 2017</ref>, p. 14.</note>
                  </item>
               </list>
               <p>The mean average retrieval precision (mAP) is then computed as the mean of AP over all <formula notation="mathml"><math xmlns="http://www.w3.org/1998/Math/MathML"><mi>Q</mi></math></formula> evaluation queries:</p> 
                  <p>
                     <formula notation="mathml">
                     <math xmlns="http://www.w3.org/1998/Math/MathML"><mrow>
                           <mfrac>
                              <mn>1</mn>
                              <mi>Q</mi>
                           </mfrac>
                           <mrow>
                              <munderover>
                                 <mo stretchy="false">∑</mo>
                                 <mrow>
                                    <mi>q</mi>
                                    <mtext>&#160;=&#160;</mtext>
                                    <mn>1</mn>
                                 </mrow>
                                 <mi>Q</mi>
                              </munderover>
                              <msub>
                                 <mi mathvariant="italic">AP</mi>
                                 <mi>q</mi>
                              </msub>
                           </mrow>
                     </mrow></math>
               </formula></p>
            </div>
            <div type="subchapter">
               <head>5.2.2 Results</head>
               <p>We evaluate our method with <formula notation="mathml">
                     <math xmlns="http://www.w3.org/1998/Math/MathML"><mrow>
                           <mi>Q</mi>
                           <mtext>&#160;=&#160;</mtext>
                           <mn>18</mn>
                     </mrow></math>
               </formula> images. As queries, we select already available digital images of artworks; these are not included in our corpus of auction catalogs. The distribution of corresponding images <formula notation="mathml">
                  <math xmlns="http://www.w3.org/1998/Math/MathML"><mi>R</mi>
                  </math>
               </formula> in the corpus for the evaluation images is as follows: 15 images of artworks have one matching target image, two images have two matching target images, and one has three. To enable a convenient reproduction of the evaluation, we include all evaluation images as examples in the app. </p>
               <p>
                  <ref type="graphic" target="#tab002">Table&#160;2</ref> presents top-1 and top-5 accuracies, along with the retrieval mAP for the three pre-training schemes detailed in <ref type="intern" target="#hd8">section 4.1</ref>. All models share the same ResNet-50 architecture but vary in their pre-training. No specific fine-tuning was applied to the models for the retrieval task. </p>
               <table xml:id="tab002">
                  <row>
                     <cell>
                        <hi rend="bold">Training Dataset</hi>
                     </cell>
                     <cell>
                        <hi rend="bold">Top-1 Acc.</hi>
                     </cell>
                     <cell>
                        <hi rend="bold">Top-5 Acc.</hi>
                     </cell>
                     <cell>
                        <hi rend="bold">mAP</hi>
                     </cell>
                  </row>
                  <row>
                     <cell>ImageNet</cell>
                     <cell>72.2</cell>
                     <cell>88.9</cell>
                     <cell>73.3</cell>
                  </row>
                  <row>
                     <cell>Arts</cell>
                     <cell>77.8</cell>
                     <cell>83.3</cell>
                     <cell>72.2</cell>
                  </row>
                  <row>
                     <cell>POSES</cell>
                     <cell>33.3</cell>
                     <cell>38.7</cell>
                     <cell>29.6</cell>
                  </row>
                  <trailer>
                     <ref type="intern" target="#tab2">Table&#160;2</ref>: The table shows the metrics for our method where the extraction network was trained on the <term type="dh">ImageNet</term>, <term type="dh">Arts</term> or <term type="dh">Poses</term> datasets. While for the <term type="dh">top-1 accuracy</term> we achieve best results with the Arts model, the ImageNet model scores the best <term type="dh">top-5 accuracy</term> and <term type="dh">mAP</term>.</trailer>
               </table>
               <p>Comparing the ImageNet and Arts pre-trained models, we do not see strong differences. While Arts pre-training shows slightly better performance in top-1 accuracy, ImageNet pre-training yields higher top-5 accuracy and retrieval mAP. From the two models evaluated, we cannot conclude that pre-training within the target domain (premodern paintings vs. photographs) increases the retrieval performance. Instead, these results suggest that the features learned from ImageNet classification are sufficiently generic to capture similarity in artistic representations of reality. Qualitative examples which illustrate successful queries are discussed in <ref type="intern" target="#hd16">section 6</ref>.</p>
               <p>The decline in performance for the model pre-trained for pose estimation is striking. We hypothesize that the features learned for the body posture estimation are too specialized towards their original application to effectively capture relevant aspects of artwork similarity such as image composition. They also fail to recognize similarities between landscapes or inanimate objects, such as clouds, trees, houses, and abstract pictorial elements. To validate this hypothesis, we conducted another evaluation with only six images featuring at least one person very prominently in the image and reported the results in <ref type="graphic" target="#tab003">Table&#160;3</ref>. This setting led to an increased performance for all models, particularly for the Arts model, which achieved perfect top-1 accuracy. The pose estimation model also displayed significant improvement, especially when compared to the ImageNet model. This indicates that models trained for pose recognition indeed capture artwork similarity better when persons are depicted. </p>
               <table xml:id="tab003">
                  <row>
                     <cell>
                        <hi rend="bold">Model</hi>
                     </cell>
                     <cell>
                        <hi rend="bold">Top-1 Acc.</hi>
                     </cell>
                     <cell>
                        <hi rend="bold">Top-5 Acc.</hi>
                     </cell>
                     <cell>
                        <hi rend="bold">Retrieval mAP</hi>
                     </cell>
                  </row>
                  <row>
                     <cell>ImageNet</cell>
                     <cell>85.7</cell>
                     <cell>88.1</cell>
                     <cell>85.7</cell>
                  </row>
                  <row>
                     <cell>Arts</cell>
                     <cell>100</cell>
                     <cell>100</cell>
                     <cell>100</cell>
                  </row>
                  <row>
                     <cell>POSES</cell>
                     <cell>71.4</cell>
                     <cell>71.4</cell>
                     <cell>66.3</cell>
                  </row>
                  <trailer>
                     <ref type="intern" target="#tab3">Table&#160;3</ref>: Evaluation of the models’ performances when the query images are restricted to images of artworks with at least one person depicted. We see an increased performance in all models with the Arts-pre-trained models even achieving perfect retrieval metrics. Specifically the model trained for pose estimation performs considerably better and partly closes the gap compared to the two other pre-training schemes.
                  </trailer>
               </table>
               <p>Two artworks could not be retrieved by any method (see <ref type="graphic" target="#aiding_provenance_009">Fig.&#160;9</ref>). This drastically lowers the evaluation metrics. To identify the cause, we experimented with different image rotations and compression applied to the query image. However, neither of these measures could resolve the issue. We also confirmed that the images were correctly extracted during the preparatory image detection step (see <ref type="intern" target="#hd12">section 5.1</ref>) to eliminate the possibility that it might have failed. </p>
               <figure>
                  <graphic xml:id="aiding_provenance_009" url="Medien/aiding_provenance_009.png">
                     <desc>
                        <ref type="intern" target="#abb9">Figure&#160;9</ref>: These two artworks drastically lower the quantitative metrics: Adrian Ludwig Richter’s <title>Hirten am Feuer,</title> (c. 1861) on the left and Heinrich Bürkel’s <title>Der Kochelsee mit den Häusern von Schlehdorf</title> (c. 1863&#160;/ 1867) on the right. [Graphic: Mathias Zinnen&#160;/ Sabine Lang 2024, image sources: <ref type="bibliography" target="#kettererkunst_auction_2018a">Ketterer Kunst 2018a</ref>; <ref type="bibliography" target="#kettererkunst_auction_2018b">Ketterer Kunst 2018b</ref>]</desc>
                  </graphic>
               </figure>
               <p>Further investigation revealed that the images cropped from the auction catalogs (see <ref type="graphic" target="#aiding_provenance_010">Fig.&#160;10</ref>) were of poor quality, characterized by blurriness and noise. However, as illustrated by the two cropped images of artworks in the second row of <ref type="graphic" target="#aiding_provenance_010">Fig.&#160;10</ref>, poor image quality alone is not a determining factor for the model not being able to find the images. Although they also have a bad quality, the model is able to detect the works in the dataset. </p>
               <figure>
                  <graphic xml:id="aiding_provenance_010" url="Medien/aiding_provenance_010.png">
                     <desc>
                        <ref type="intern" target="#abb10">Figure&#160;10</ref>: Crops of the target images of artworks as detected in the auction catalogs. Row (a) shows the images which could not be found by any of the extraction methods. The second row (b) shows detections of an artwork by Max Liebermann with a similarly bad image quality which were found (note also the difference in quality and color between the two identical images). [Graphic: Mathias Zinnen&#160;/ Sabine Lang 2024, image source: <ref type="bibliography" target="#germansales_2025">German Sales 2025</ref>]</desc>
                  </graphic>
               </figure>
               <p>In the following, we present two case studies to illustrate how the presented method can be successfully used to search for identical and similar images. This way, the method not only aids provenance researchers with reconstructing an object’s origin but also facilitates the study of visual patterns.</p>
            </div>
            </div>
         </div>
         <div type="chapter">
            <head>6. Case Studies</head>
            <div type="subchapter">
               <head>6.1 Case Study: Retrieving Identical Images</head>
               <p>In this section we want to demonstrate the potential and performance of our method to search for identical images in auction catalogs, thus assisting with the reconstruction of an object’s provenance. To perform the search, we use the demo-app which we have introduced in <ref type="intern" target="#hd10">section 4.3</ref>. In June 2024, the Munich-based auction house Neumeister offered a work by Johann Sperl (1840–1914) entitled <title>Sommerlust</title>.<note type="footnote"> Cf. <ref type="bibliography" target="#neumeister_2024">Neumeister 2024</ref>.</note> Can we find out more about the painting’s provenance using our method? To this end, we perform a search in our collected data set using the app. We upload the query image, set the number of results to ten and initiate the search. The results appear after a few seconds underneath the query image (<ref type="graphic" target="#aiding_provenance_011">Fig.&#160;11</ref>). The first three results are identical to the query image, suggesting that the painting was offered in three auctions. The first result refers to an auction which took place on April 24, 1928, in the Kunsthaus Lempertz in Cologne.<note type="footnote"> Cf. <ref type="bibliography" target="#khlempertz_auction_1928a">Kunsthaus Lempertz 1928a</ref>.</note> The painting was listed as lot 32, titled <title>Kinder auf der Wiese</title><note type="footnote"> Cf. <ref type="bibliography" target="#khlempertz_auction_1928b">Kunsthaus Lempertz 1928b</ref>. </note> and displayed on panel 13.<note type="footnote"> Cf. <ref type="bibliography" target="#khlempertz_auction_1928c">Kunsthaus Lempertz 1928c</ref>.</note> Notably, the name differs from the current title, highlighting the issue of a text-based search. Two years later, on November 14, 1930, Sperl’s painting was included in an auction at Paul Cassirer’s <title>Kunstsalon</title> in Berlin.<note type="footnote"> Cf. <ref type="bibliography" target="#kpc_auction_1930a">Kunstsalon Paul Cassirer 1930a</ref>.</note> The respective auction catalog shows the work on page 88. Back then, however, it was entitled <title>Kind auf der Wiese</title>&#160;– a slight variation of the 1928-title.<note type="footnote"> Cf. <ref type="bibliography" target="#kpc_auction_1930b">Kunstsalon Paul Cassirer 1930b</ref>.</note> The last result points to an auction at Hugo Helbing’s gallery on March 26, 1927&#160;– the earliest auction date in our list of results.<note type="footnote"> Cf. <ref type="bibliography" target="#ahh_auction_1927a">Auktionshaus Hugo Helbing 1927a</ref>.</note> The corresponding catalog lists the work as <title>Sommerlust</title> (lot 106) on page 22<note type="footnote"> Cf. <ref type="bibliography" target="#ahh_auction_1927b">Auktionshaus Hugo Helbing 1927b</ref>. </note> and includes a reproduction of the painting on panel 16.<note type="footnote"> Cf. <ref type="bibliography" target="#ahh_auction_1927c">Auktionshaus Hugo Helbing 1927c</ref>.</note> The example of Johann Sperl demonstrates the efficiency of our method and is indeed confirmed by the provenance information given on the website of Neumeister which mentions all three provenances.<note type="footnote"> Cf. <ref type="bibliography" target="#neumeister_2024">Neumeister 2024</ref>.</note>
               </p>
               <p>If we look at the calculated feature distance, we notice that the first result (Lempertz) had a distance of 257, the second (Cassirer) a distance of 259 and the last (Helbing) a calculated distance of 448 to the query image. All images show the same content to the query; why do they have a different distance then? Here is a possible explanation: The coloration of the images vary slightly as well as the rotation (Helbing). These deviations from the query image might cause these observed distances, especially to the Helbing example.<note type="footnote">It is unclear whether the coloration of the pages is present in the originals or a result of the digitization process and only visible in the digital reproductions.</note>
               </p>
               <figure>
                  <graphic xml:id="aiding_provenance_011" url="Medien/aiding_provenance_011.png">
                     <desc>
                        <ref type="intern" target="#abb11">Figure&#160;11</ref>: Search results for Johann Sperl’s <hi rend="italic">Sommerlust</hi>: The work appeared in a catalog in (a) 1928, (b) 1930 and (c) 1927. [Graphic: Mathias Zinnen&#160;/ Sabine Lang 2024]</desc>
                  </graphic>
               </figure>
            </div>
            <div type="subchapter">
               <head>6.2 Case Study: Retrieving Similar Images</head>
               <p>For many visual disciplines such as art history it is not only relevant to find identical but also similar images. Finding these similarities offers insights into reception processes over time and space, prevailing taste, artistic networks and focuses of auction houses and collectors. Can we use our method to address the following questions: </p>
               <list type="ordered">
                  <item>In which contexts does the motif of the boat appear? </item>
                  <item>What can be said about the drawing style of Max Liebermann (1847–1935)? </item>
               </list>
            
            <div type="subchapter">
               <head>6.2.1 The Motif of the Boat</head>
               <p>In order to answer the first question, we choose Andreas Achenbach’s (1815–1910) painting <title>Fischerboot auf stürmischer See</title> (1895) as a query image and initiate the search (<ref type="graphic" target="#aiding_provenance_012">Fig.&#160;12</ref>). The first result is identical to the query, therefore we disregard this image in our analysis. </p>
               <figure>
                  <graphic xml:id="aiding_provenance_012" url="Medien/aiding_provenance_012.png">
                     <desc>
                        <ref type="intern" target="#abb12">Figure&#160;12</ref>: The first search results (excluding the first&#160;/ identical) for Andreas Achenbach’s <hi rend="italic">Fischerboot auf stürmischer See</hi>: The painting was offered at auctions in (a) 1936, (b) 1931 and (c) 1928. [Graphic: Mathias Zinnen&#160;/ Sabine Lang 2024]</desc>
                  </graphic>
               </figure>
               <p>The second image (a) shows a boat close to the shore, embedded within a mountain view. It was painted by Max Buri (1868–1915) and offered as <title>Brienzersee</title> (1894) in an auction hosted by the Gallery Neupert in Zurich on April 4, 1936.<note type="footnote"> Cf. <ref type="bibliography" target="#neupert_auction_1936">Galerie Neupert</ref>. </note> Similar to the query, the boat is shown at a central position; however, the stormy water and atmosphere is replaced by a calmness and tranquility. In addition, Buri’s painting is devoid of any human life. The third result shows a similar visual pattern: A boat floats on a calm lake at a central position, surrounded by a deserted landscape, mountain range, small houses and shore. The image was painted by Otto Frölicher (1840–1890) and offered as <title>Barken</title> in an auction catalog of G. &amp; L. Bollag in Zurich in 1931.<note type="footnote"> Cf. <ref type="bibliography" target="#bollag_auction_1931">G. &amp; L. Bollag 1931</ref>.</note> These first results suggest a content similarity, possibly influenced by the motifs of the boat and&#160;/ on water. The last result discussed in this case study stems from a catalog of the <title>Kunstsalon Dr. Störi</title> in Zurich; the respective auction took place in March 1928.<note type="footnote"> Cf. <ref type="bibliography" target="#kstoeri_auction_1928a">Kunstsalon Dr. Störi 1928a</ref>. </note> The image shows a painting by Guillaumin Armand (1841–1928) and is entitled <title>Der Kran</title>.<note type="footnote"> Cf. <ref type="bibliography" target="#kstoeri_auction_1928b">Kunstsalon Dr. Störi 1928b</ref>. </note> We see a crane vessel in the middle ground, another one is visible in the background. The right side of the image displays piles of sand and two standing figures who turn their back towards the viewer. Compared to the first results the image conveys a sense of motion and liveliness, this is accentuated by the painting style (the reproduction suggests visible brushstrokes and different color regions). In addition to the content similarity, the last example thus suggests a similarity based on the mood and effect of the image. This concise study suggests that the motif of the boat mainly appears within a landscape setting which shows very few signs of human life. If we look at their time of creation and the life dates of the artists, we can observe that all paintings were created in the late 19<hi rend="super">th</hi> century and early 20<hi rend="super">th</hi> century (Armand), thus suggesting a preference for the motif during that time. </p>
            </div>
            <div type="subchapter">
               <head>6.2.2 The style of Max Liebermann</head>
               <p>Max Liebermann is one of the most important artists of the 19<hi rend="super">th</hi> and 20<hi rend="super">th</hi> centuries; he is known for his elaborate oil paintings as well as for his delicate drawings and sketches. We utilize the suggested method to gain more insight into Liebermann’s drawings by looking at similar images (see question two). We take his chalk drawing <title>Einholung Bismarcks in Berlin</title> (1890) as a query. Interestingly, all first results stem from an auction held at the <title>Kunstsalon</title> of Paul Cassirer in March 1925 (<ref type="graphic" target="#aiding_provenance_013">Fig.&#160;13</ref>).<note type="footnote"> We disregard the first result, because it is identical to the query image.</note> The title of the catalog already indicates that the auction only included drawings by Liebermann (namely 316 works).<note type="footnote"> Cf. <ref type="bibliography" target="#kpc_auction_1925">Kunstsalon Paul Cassirer 1925</ref>.</note> While this result does not enable us to study similar drawings by other artists, it is interesting, because it suggests a strong individual artistic style. According to the results Liebermann is most similar to himself. Further analysis might study the drawings in more detail and look at the motifs, composition or textures he used.</p>
               <p>Both case studies demonstrate the potential of our method to search for identical and similar images in a large dataset, thus not only assisting provenance researchers but any discipline interested in visual patterns over time and space. The examples of Achenbach and Liebermann highlighted that by searching for similar images, we can address a variety of research questions which go beyond the provenance of objects. </p>
               <figure>
                  <graphic xml:id="aiding_provenance_013" url="Medien/aiding_provenance_013.png">
                     <desc>
                        <ref type="intern" target="#abb13">Figure&#160;13</ref>: Search results for Liebermann’s drawing <hi rend="italic">Einholung Bismarcks in Berlin</hi> (1890). All results appear in an auction catalog published by the <title>Kunstsalon Paul Cassirer</title> in March 1925. [Graphic: Mathias Zinnen&#160;/ Sabine Lang 2024]</desc>
                  </graphic>
               </figure>
            </div>
            </div>
         </div>
         <div type="chapter">
            <head>7. Similarity</head>
            <p>The previous sections described how our method allows to search for identical and similar images based on a given query. Thus, the term <term type="dh">similar</term> featured often. Although many scholars emphasize the blurriness of the term, <term type="dh">similarity</term> is a widely discussed concept and has been addressed within art history<note type="footnote"> Cf. <ref type="bibliography" target="#gaier_et_al_similtudo_2012">Gaier et&#160;al. 2012</ref>. </note> and media studies, among others.<note type="footnote"> Cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>.</note> Dorothee Kimmich offers a comprehensive introduction to the concept of similarity, particularly within modernity.<note type="footnote"> Cf. <ref type="bibliography" target="#kimmich_ungefaehre_2017">Kimmich 2017</ref>.</note> In her book, Kimmich highlights the general unpopularity of vague terms such as similarity: Philosophy proclaims its unusability and cultural studies consider it historically outdated.<note type="footnote"> Cf. <ref type="bibliography" target="#kimmich_ungefaehre_2017">Kimmich 2017</ref>, p. 15.</note> Critics argue that since everything can be similar to everything else in some way, no new knowledge is produced.<note type="footnote"> Cf. <ref type="bibliography" target="#kimmich_ungefaehre_2017">Kimmich 2017</ref>, pp.&#160;18–19.</note> Depending on which criteria are picked, things can either be similar or dissimilar.<note type="footnote"> Cf. <ref type="bibliography" target="#kimmich_ungefaehre_2017">Kimmich 2017</ref>, p. 14. Kimmich discusses that in the context of social gender roles and biological sexes.</note> Today, similarity is seen as a mental and subjective concept that enables and organizes recognition.<note type="footnote"> Cf. <ref type="bibliography" target="#kimmich_ungefaehre_2017">Kimmich 2017</ref>, p. 34.</note> While the concept of similarity is often rejected and criticized, its vagueness and blurriness might also provide a chance to explore and discuss its meaning within a specific (thematic) context, free from prior assumptions or guiding definitions. This paper makes use of this degree of freedom by looking at <term type="dh">similarity</term> in relation to the method employed. Related terms such as mimesis, imitation&#160;/ mimicry, similitudo, or iconicity will not be discussed, as it would go beyond the scope of this paper.<note type="footnote">Similitudo, for example, plays a central role for the genre of portraiture; a fundamental requirement for a portrait is that it resembles the subject&#160;/ person (cf. <ref type="bibliography" target="#gaier_et_al_similtudo_2012">Gaier et&#160;al. 2012</ref>). Mimesis has existed as a classical category since antiquity and is, according to Hartmut Winkler, connected to the question of how art and the media relate to the world (cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, p. 283). Iconicity then also refers to the fact that images (i.&#160;e. photos) are similar to what is shown (cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, p. 47). See the following sources for more information: <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, pp.&#160;11–12, 47, 283; <ref type="bibliography" target="#kimmich_ungefaehre_2017">Kimmich 2017</ref>, p. 15, 21; <ref type="bibliography" target="#gaier_et_al_similtudo_2012">Gaier 2012</ref>.</note> After providing some general remarks, we focus on two aspects of similarity: quantification and the process of abstraction. </p>
            <div type="subchapter">
               <head>7.1 General Remarks on Similarity</head>
               <p>Comparative processes are integral to determining similarity.<note type="footnote">In his book, Winkler mentions that attesting similarity requires comparative processes; these processes, however, can happen unconsciously (cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, pp.&#160;111–112).</note> Humans, for example, compare paintings to reach a conclusion about their similarity. This process is guided by various criteria, which are manifold, subjective and often difficult to grasp. These criteria might include specific motifs, the image composition, color, forms, texture or artist, time period and location and thus encompass both internal and external criteria of the image. Our proposed method suggests a similarity based on image-inherent, visual criteria. This similarity might be determined by global image structures, numerous objects and their arrangement, or singular objects. Following this approach, we understand similarity as determined by criteria inherent and visible in an image. Thus, external criteria described for example by metadata, such as artist, title, year, or technique, are disregarded.</p>
               <p>While our method focuses on internal criteria, it still allows us to explore different similarity dimensions: By selecting a suitable pre-training method, the computer scientist can try to influence the type of similarity underlying the retrieval process. For example, we can assume that an extraction network trained to recognize body postures will put more weight on this aspect of similarity. Therefore, the process of attesting similarity is already influenced during pre-training. <ref type="graphic" target="#aiding_provenance_014">Fig.&#160;14</ref> illustrates this: A search for Ludwig von Zumbusch’s <title>Einsames Land</title> (1896) leads to different results depending on the embedding type selected by the user. While results for the models ImageNet and Arts show a clear preference for landscapes, the Poses model prefers images with persons and objects. In general, it can be observed that using the Poses model leads to worse results than utilizing the ImageNet and Arts models. We illustrate this by searching for Sperl’s <title>Sommerlust</title> using all three embedding types (see <ref type="graphic" target="#aiding_provenance_015">Fig.&#160;15</ref>). Models trained on ImageNet and Arts retrieve all three instances of Sperl’s painting in the dataset, while the Poses model only finds one instance. </p>
               <figure>
                  <graphic xml:id="aiding_provenance_014" url="Medien/aiding_provenance_014.png">
                     <desc>
                        <ref type="intern" target="#abb14">Figure&#160;14</ref>: Search results using different embedding types; all models were trained on different data thus leading to different search results and allowing to focus on diverse similarity criteria (so our hypothesis). [Graphic: Mathias Zinnen&#160;/ Sabine Lang 2024]</desc>
                  </graphic>
               </figure>
               <figure>
                  <graphic xml:id="aiding_provenance_015" url="Medien/aiding_provenance_015.png">
                     <desc>
                        <ref type="intern" target="#abb15">Figure&#160;15</ref>: Search results using different embedding types; the models <term type="dh">ImageNet</term> and <term type="dh">Arts</term> perform best for the task of finding identical images to Johann Sperl’s <hi rend="italic">Sommerlust</hi>. [Graphic: Mathias Zinnen&#160;/ Sabine Lang 2024]</desc>
                  </graphic>
               </figure>
               <p>In our method, we derive quantifiable, vector-shaped representations of image content and calculate the distance between two vectors using an arbitrary distance metric (i.&#160;e. Euclidean Distance). We assume that this distance approximates the degree of similarity between the images. However, the computation of the distance between two feature vectors does not provide any insights into interpretable similarity criteria; knowing the similarity distance function does not explain why two pictures might be perceived as similar, while others are seen as different.<note type="footnote"> While we can influence the distance computation by selecting a different distance metric (i.&#160;e. Euclidean Distance) to some degree, the general tendency will remain similar.</note> To understand what makes artworks similar from a data-driven perspective, we must consider the properties of the feature space. This requires discussing the function that translates pixel-based image representations into feature vectors. We refer to this process as quantification.</p>
            </div>
            <div type="subchapter">
               <head>7.2 Quantification </head>
               <p>According to the <title>Duden</title> (dictionary of the German language), <term type="dh">quantification</term> means the transformation of qualities into quantities, for example the properties of something (here: an image) in numbers and measurable values.<note type="footnote"> Cf. <ref type="bibliography" target="#dudenredaktion_quantifizierung_2024">Dudenredaktion (ed.) 2024</ref>.</note> In this paper, quantification can relate to two different aspects, namely the process of translating an image into feature vectors and the fact that similarity is computed which requires a quantification process. </p>
               <p>In order to search for identical and similar images, we utilize a neural network to extract feature vectors which are essentially numerical representations of a group of features which describe an image (such as colors, edges, or objects). Thus, these features quantify and represent the content of an image. During the search process, the feature vector of the given query image is compared to all feature vectors stored in the feature database and their distances are calculated using the Euclidean Distance. The closer the distance, the more similar the images are according to the algorithm. Therefore, similarity is not based on a subjective impression, but on measurable, comparable, and objective numerical values. Since feature vectors are extracted from input data&#160;– in our case digital images&#160;– the question arises whether the similarity measure depends on the image quality and can therefore vary even for images which appear identical or similar to the human eye (see case study on Johann Sperl, <ref type="intern" target="#hd17">section 6.1</ref>). For example, different reproduction and digitization techniques might result in different color representations or image contrasts which might influence the feature vectors and calculations respectively. <ref type="graphic" target="#aiding_provenance_010">Fig.&#160;10</ref> shows that even for the same image the quality and color of the reproduction differs significantly.</p>
               <p>Cognitive psychology picks up on the idea that similarity is founded in and expressed by numerical values. Psychological processes are modeled using computers; this means that only processes that can be expressed in variables and algorithms are represented. Accordingly, features are variables to which numerical values are assigned allowing computation. Essentially, for Cognitive Psychology, similarity is characterized as a ›feature overlap‹<note type="footnote"> Cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, pp.&#160;96–97.</note>: <quote>People notice that a number of objects overlap substantially and proceed to form a category to include these items. […] Categorization is justified by the observation that objects tend to cluster in terms of their attributes, be these physical features, linguistic labels […].</quote><note type="footnote">
                     <ref type="bibliography" target="#anderson_nature_1991">Anderson 1991</ref>, p. 411.</note> As Winkler concludes in his book on similarity, the computer provides the frame in which cognitivists think about similarity.<note type="footnote"> Cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, p. 98.</note> The same can be said about our project, since the method guides and essentially dictates how we think and write about similarity. </p>
            </div>
            <div type="subchapter">
               <head>7.3 Process of Abstraction</head>
               <p>In his book, Winkler emphasizes that comparison and similarity separates things in aspects (features), which are similar and dissimilar.<note type="footnote"> Cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, p. 257.</note> Human observation, according to him, does not remain with the things themselves, but rather moves on to their properties and characteristics.<note type="footnote"> Cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, p. 93.</note> Accordingly, similarity abstracts something from things (the similar thing eventually results in a form; form then plays a crucial role for him). Thus, for Winkler, similarity is based on mechanisms of abstraction.<note type="footnote"> Cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, pp.&#160;257–258.</note> Accordingly, we understand the ›process of abstraction‹ as a process in which certain criteria of the input data are abstracted to determine if things are similar or dissimilar. As described previously in <ref type="intern" target="#hd9">section 4.2</ref>, an image-based search requires the extraction of features from the data. Ideally, these features describe and represent the (content of the) data. This process aligns with Winkler’s conception of similarity, as it is based on specific criteria which are context dependent. This process of abstraction in the machine mirrors the human approach to recognizing similarities, most evidently illustrated by the conversion of input data into a lower-dimensional vector representation during feature extraction. This representation retains the essential information needed for a given task&#160;– in this case, retrieval&#160;– while disregarding unimportant information. This process is thus associated with a loss of (certain) information and potentially with a loss of possible similarity criteria. </p>
               <p>Winkler also raises the question wether similarity implies that our attention is focused on certain criteria: What exactly is guiding our attention and selection?<note type="footnote"> Cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, p. 295.</note> For humans, the cultural and societal context as well as personal experiences and preferences play a crucial role.<note type="footnote">American philosopher Nelson Goodman famously stated that <quote>Circumstances alter similarities</quote> (quoted in <ref type="bibliography" target="#kimmich_ungefaehre_2017">Kimmich 2017</ref>, p. 24), thus emphasizing the context-dependency of similarity judgments (cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, p. 94).</note> As mentioned before, the computer scientist can try to influence the type of similarity underlying the retrieval process by selecting a suitable pre-training method. We can assume, for example, that an extraction network trained to recognize body postures will put more weight on these similarity criteria (see results <ref type="graphic" target="#aiding_provenance_014">Fig.&#160;14</ref>). If we interpret feature extraction as an abstraction in Winkler’s sense, selecting a specific pre-training method determines the understanding of similarity underlying the retrieval system. Selecting a specific method can then be correlated to the type of information we are ›omitting‹ during the translation of pixels into a feature vector. Whenever we select a specific method for this translation, we also decide which features are deemed irrelevant by the models. Similarity in this way is always influenced by the choice we make regarding the pre-training method. This means that the attention and selection process of the network is guided by human input, often motivated by the research question, task or personal interests. In comparison to humans, the attention and selection of similarity criteria in this context is much more conscious and goal-driven.</p>
               <p>We conclude the following: Attesting similarity requires comparative processes separating things into aspects (features), which are similar and dissimilar<note type="footnote"> Cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, pp.&#160;111–112, 257.</note>, which in the context of this paper refer to image-internal components. These criteria are abstracted from the image and quantified, thus becoming measurable. We also noted that the image quality might influence the similarity measure. The conversion of input data into a lower-dimensional vector representation during feature extraction is associated with a loss of information and therefore might result in a loss of similarity dimensions. We also elaborated on the fact that the selection of the pre-training method can influence the type of similarity underlying the retrieval process. This section thus highlighted that the computer guides as well as limits our understanding of similarity. Discussing similarity in the context of this paper, with a particular focus on quantification and the process of abstraction, provided interesting insights and leaves room for further discussion and research.</p>
            </div>
         </div>
         <div type="chapter">
            <head>8. Conclusion</head>
            <p>This paper demonstrated the potential of applying machine learning methods for provenance research. We showed how an image-based search in auction catalogs can circumvent the issue of missing information, varying titles or artist attribution and thus assist with the reconstruction of an object’s provenance. The same method might be used to study similar material such as exhibition catalogs, a <term type="dh">catalogue raisonné</term>, magazines, newspapers or photographs. Future work also includes testing other models, such as <term type="dh">CLIP</term>, and creating a long-term accessible interface that will allow provenance researchers to use the method. Beyond technical aspects, the paper addressed the implications of machine learning-based retrieval for the concept of similarity, especially focusing on its quantification and the process of abstraction. In the context of this paper, similarity is based on visual criteria inherent in the image. Our proposed method enables us to explore different similarity dimensions by selecting a suitable pre-training method. We elaborated on the fact that quantification relates to two different aspects, namely the process of translating an image into feature vectors and that similarity is computed which requires a quantification process in the first place. Thus, similarity becomes a measurable, comparable and objective numerical value. We also highlighted a connection to Cognitive Psychology, where psychological processes are modeled using a computer and essentially are represented as numbers.<note type="footnote"> Cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, p. 97.</note> The paper also emphasized that similarity requires abstraction processes. Here, we referred to Hartmut Winkler, who also wrote that similarity is based on mechanisms of abstraction, where comparison and similarity separate things into aspects (features) which are similar and dissimilar.<note type="footnote"> Cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, pp.&#160;257–258.</note> Accordingly, we understand the ›process of abstraction‹ as a process in which certain criteria of the input data are abstracted to determine if things are similar or not. In that sense the abstraction process equals the extraction of features from the data which describe and represent the (content of the) data and form the basis for an image-based search. Eventually, we also elaborated on the loss of information and potentially the loss of similarity criteria associated with feature extraction and asked what is guiding the criteria selection:<note type="footnote"> Cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, p. 295.</note> For humans, this process is influenced by the context;<note type="footnote"> Cf. <ref type="bibliography" target="#winkler_aehnlichkeit_2021">Winkler 2021</ref>, p. 94; <ref type="bibliography" target="#kimmich_ungefaehre_2017">Kimmich 2017</ref>, p. 24.</note> for the machine, the selection process is determined by the computer scientist and the pre-training method selected for the retrieval process.</p>
            <p>In the context of provenance research the question remains, if similar is enough. The <title>Provenance Research Manual</title> published by the German Lost Art Foundation states the following: <quote>[…] [A]ll information about the identity of the piece (artist signatures, hallmarks, different attributions, variants, replicas or copies and re-casts […]) should be documented […]</quote>
               <note type="footnote">
                  <ref type="bibliography" target="#glaf_provenance_2020">German Lost Art Foundation et&#160;al. 2020</ref>, p. 40.</note> and explanations must be given, if the identity is not clear.<note type="footnote"> Cf. <ref type="bibliography" target="#glaf_provenance_2020">German Lost Art Foundation et&#160;al. 2020</ref>, p. 81.</note> Thus, the clear identity of the work is crucial when establishing its provenance. Being ›similar‹ thus might not seem enough for provenance researchers. However, visually similar objects might offer valuable clues on which research direction to pursue. </p>
         </div>
      </body>
      <back>
         <div type="bibliography">
            <head>Bibliography</head>
            <listBibl>
               <bibl xml:id="abid_et_al_gradio_2019">Abubakar Abid&#160;/ Ali Abdalla&#160;/ Ali Abid&#160;/ Dawood Khan&#160;/ Abdulrahman Alfozan&#160;/ James Zou: Gradio: Hassle-Free Sharing and Testing of ML Models in the Wild. arXiv. 06.06.2019. DOI: <ref target="https://doi.org/10.48550/arXiv.1906.02569">10.48550/arXiv.1906.02569</ref>
               </bibl>
               <bibl xml:id="anderson_nature_1991">John R. Anderson: The Adaptive Nature of Human Categorization. In: Psychological Review 98 (1991), No. 3, pp.&#160;409–429. DOI: 10.1037/0033-295X.98.3.409</bibl>
               <bibl xml:id="baehr_sales_2013">Astrid Bähr: German Sales 1930–1945. Bibliographie der Auktionskataloge aus Deutschland, Österreich und der Schweiz. Edited by Joachim Brand&#160;/ Moritz Wullen. Berlin 2013. PDF. DOI: <ref target="https://doi.org/10.11588/artdok.00002251">10.11588/artdok.00002251</ref>
               </bibl>
               <bibl xml:id="bell_impett_ikonographie_2019">Peter Bell&#160;/ Leonardo Impett: Ikonographie und Interaktion. Computergestützte Analyse von Posen in Bildern der Heilsgeschichte. In: Das Mittelalter 24 (2019), No. 1, pp.&#160;31–53. DOI: 10.1515/mial-2019-0004</bibl>
               <bibl xml:id="bengamra_et_al_challenges_2021">Siwar Bengamra&#160;/ Olfa Mzoughi&#160;/ André Bigand&#160;/ Ezzeddine Zagrouba: New Challenges of Face Detection in Paintings Based on Deep Learning. In: Giovanni Maria Farinella&#160;/ Petia Radeva&#160;/ Jose Braz&#160;/ Kadi Bouatouch (eds.): Proceedings of the 16<hi rend="super">th</hi> International Joint Conference on Computer Vision, Imaging and Computer Graphics Theory and Applications (VISAPP, Online, 08.–10.02.2021), Vol. 4. Vienna 2021, pp.&#160;311–320. DOI: 10.5220/0010243703110320</bibl>
               <bibl xml:id="bengamra_et_al_survey_2024">Siwar Bengamra&#160;/ Olfa Mzoughi&#160;/ André Bigand&#160;/ Ezzeddine Zagrouba: A Comprehensive Survey on Object Detection in Visual Art: Taxonomy and Challenge. In: Multimedia Tools and Applications 83 (2024), No. 5, pp.&#160;14637–14670. DOI: ​​10.1007/s11042-023-15968-9 </bibl>
               <bibl xml:id="bernasconi_et_al_2023">Valentine Bernasconi&#160;/ Eva Cetinic&#160;/ Leonardo Impett: A Computational Approach to Hand Pose Recognition in Early Modern Paintings. In: Journal of Imaging 6 (2023), No. 3. DOI: <ref target="https://doi.org/10.3390/jimaging9060120">10.3390/jimaging9060120</ref>
               </bibl>
               <bibl xml:id="bommert_sales_2019">Britta Bommert: German Sales 1901–1929. Bibliographie der Auktionskataloge aus Deutschland, Österreich und der Schweiz. Edited by Joachim Brand. Berlin 2019. DOI: <ref target="https://doi.org/10.11588/artdok.00006565">10.11588/artdok.00006565</ref>
               </bibl>
               <bibl xml:id="carion_et_al_endtoend_2020">Nicolas Carion&#160;/ Francisco Massi&#160;/ Gabriel Synnaeve&#160;/ Nicolas Usinier&#160;/ Alexander Kirillov&#160;/ Sergey Zagoruyko: End-to-End Object Detection with Transformers. In: Andrea Vedaldi&#160;/ Horst Bischof&#160;/ Thomas Brox&#160;/ Jan-Michael Frahm (eds.): Computer Vision&#160;– ECCV 2020. Conference Papers. Part I (Online, 23.–28.08.2020). Cham, CH 2020, pp.&#160;213–229. DOI: 10.1007/978-3-030-58452-8_13</bibl>
               <bibl xml:id="castellano_vessio_overview_2021">Giovanna Castellano&#160;/ Gennaro Vessio: A Brief Overview of Deep Learning Approaches to Pattern Extraction and Recognition in Paintings and Drawings. In: Alberto Del Bimbo&#160;/ Rita Cucchiara&#160;/ Stan Sclaroff&#160;/ Giovanni Maria Farinella&#160;/ Tao Mei&#160;/ Marco Bertini&#160;/ Hugo Jair Escalante&#160;/ Roberto Vezzani (eds.): Pattern Recognition. ICPR International Workshops and Challenges. Proceedings (ICPR 2021, Online, 10–15.01.2021). Cham, CH 2021, pp.&#160;487–501. DOI: 10.1007/978-3-030-68796-0_35 </bibl>
               <bibl xml:id="castellano_et_al_link_2021">Giovanna Castellano&#160;/ Eufemia Lella&#160;/ Gennaro Vessio: Visual Link Retrieval and Knowledge Discovery in Painting Datasets. In: Multimedia Tools and Applications 80 (2021), pp.&#160;6599–6616. DOI: <ref target="https://doi.org/10.1007/s11042-020-09995-z">10.1007/s11042-020-09995-z</ref>
               </bibl>
               <bibl xml:id="cetinic_she_understanding_2022">Eva Cetinic&#160;/ James She: Understanding and Creating Art with AI: Review and Outlook. In: ACM Transactions on Multimedia Computing, Communications, and Applications (TOMM) 18 (2022), No. 2, pp.&#160;1–22. DOI: 10.1145/3475799</bibl>
               <bibl xml:id="chen_et_al_learning_2022">Wei Chen&#160;/ Yu Liu&#160;/ Weiping Wang&#160;/ Erwin M. Bakker&#160;/ Theodoros Georgiou&#160;/ Paul Fieguth&#160;/ Li Liu&#160;/ Michael S. Lew: Deep Learning for Instance Retrieval. A Survey. In: IEEE Transactions on Pattern Analysis and Machine Intelligence 45 (2022), No. 6, pp.&#160;7270–7292. DOI: 10.1109/TPAMI.2022.3218591</bibl>
               <bibl xml:id="coic_2024">Common Objects in Context. Last accessed: 30.07.2024. HTML. [<ref target="https://cocodataset.org/#detection-eval">online</ref>]</bibl>
               <bibl xml:id="crowley_zisserman_state_2014">Elliot J. Crowley&#160;/ Andrew Zisserman: The State of the Art. Object Retrieval in Paintings using Discriminative Regions. In: Michel Valstar&#160;/ Andrew French&#160;/ Tony Pridmore (eds.): Proceedings of the British Machine Vision Conference 2014 (BMVC 2014, Nottingham, UK, 01.–05.09.2014). Nottingham, UK 2014. PDF. DOI: <ref target="http://dx.doi.org/10.5244/C.28.38">10.5244/C.28.38</ref></bibl>
               <bibl xml:id="crowley_zisserman_search_2015">Elliot J. Crowley&#160;/ Andrew Zisserman: In Search of Art. In: Lourdes Agapito&#160;/ Michael M. Bronstein&#160;/ Carsten Rother (eds.): Computer Vision&#160;– ECCV 2014 Workshops. Proceedings. Part I (Zurich, 06.–12.09.2014). Cham, CH etc. 2015, pp.&#160;54–70. DOI: 10.1007/978-3-319-16178-5_4 </bibl>
               <bibl xml:id="crowley_zisserman_art_2016">Elliot J. Crowley&#160;/ Andrew Zisserman: The Art of Detection. In: Gang Hua&#160;/ Hervé Jégou (eds.): Computer Vision&#160;– ECCV 2016 Workshops. Proceedings. Part I (Amsterdam, 08.–10.10.2016 and 15.–16.10.2016). Cham, CH 2016, pp.&#160;721–737. PDF. DOI: <ref target="https://doi.org/10.1007/978-3-319-46604-0_50">10.1007/978-3-319-46604-0_50</ref>
               </bibl>
               <bibl xml:id="csurka_et_al_categorization_2004">Gabriella Csurka&#160;/ Christopher R. Dance&#160;/ Lixin Fan&#160;/ Jutta Willamowski&#160;/ Cédric Bray: Visual Categorization with Bags of Keypoints. In: Workshop on Statistical Learning in Computer Vision. Proceedings (ECCV 2004, Prague, 15.05.2004). PDF. [<ref target="https://people.eecs.berkeley.edu/~efros/courses/AP06/Papers/csurka-eccv-04.pdf">online</ref>]</bibl>
               <bibl xml:id="deng_et_al_imagenet_2009">Jia Deng&#160;/ Wei Dong&#160;/ Richard Socher&#160;/ Li-Jia Li&#160;/ Kai Li&#160;/ Li Fei-Fei: Imagenet. A Large-Scale Hierarchical Image Database. In: 2009 IEEE Conference on Computer Vision and Pattern Recognition. Proceedings (Miami, 20.–25.06.2009). Miami 2009, pp.&#160;248–255. PDF. DOI: 10.1109/CVPR.2009.5206848</bibl>
               <bibl xml:id="dudenredaktion_quantifizierung_2024">Dudenredaktion (ed.): Quantifizierung. In: Duden online. Last accessed: 25.07.2024. HTML. [<ref target="https://www.duden.de/node/116936/revision/1232498">online</ref>] </bibl>
               <bibl xml:id="everingham_et_al_pascal_2010">Mark Everingham&#160;/ Luc Van Gool&#160;/ Christopher K. I. Williams&#160;/ John Winn&#160;/ Andrew Zisserman: The PASCAL Visual Object Classes (VOC) Challenge. In: International Journal of Computer Vision (2010), No. 88, pp.&#160;308–338. DOI: 10.1007/s11263-009-0275-4 </bibl>
               <bibl xml:id="fletcher_et_al_mapping_2012">Pamela Fletcher&#160;/ Anne Helmreich&#160;/ David Israel&#160;/ Seth Erickson: Local&#160;/ Global: Mapping Nineteenth-Century London’s Art Market. In: Nineteenth-Century Art Worldwide 11 (2012), No. 3. HTML. [<ref target="https://www.19thc-artworldwide.org/index.php/autumn12/fletcher-helmreich-mapping-the-london-art-market">online</ref>]</bibl>
               <bibl xml:id="foka_computer_2021">Amalia Foka: Computer Vision Applications for Art History: Reflections and Paradigms for Future Research. In: Proceedings of EVA London 2021. AI and the Arts: Artificial Imagination (EVA 2021, London, 05.–09.07.2021). London 2021, pp.&#160;73–80. PDF. DOI: <ref target="https://doi.org/10.14236/ewic/EVA2021.12">10.14236/ewic/EVA2021.12</ref>
               </bibl>
               <bibl xml:id="gaier_et_al_similtudo_2012">Martin Gaier&#160;/ Jeanette Kohl&#160;/ Alberto Saviello: Similitudo. Konzepte der Ähnlichkeit in Mittelalter und Früher Neuzeit. Paderborn 2012. <ptr type="gbv" cRef="71600643X"/>
               </bibl>
               <bibl xml:id="garcia_vogiatzis_paintings_2018">Noah Garcia&#160;/ George Vogiatzis: How to Read Paintings: Semantic Art Understanding with Multi-Modal Retrieval. arXiv. 23.10.2018. PDF. DOI: <ref target="https://doi.org/10.48550/arXiv.1810.09617">10.48550/arXiv.1810.09617</ref></bibl>
               <bibl xml:id="vicente-garcia_ersten_2021">Raul Vicente-Garcia: Zum Ersten, zum Zweiten, zum Dritten&#160;– gefunden! In: FUTUR (2021), No. 2. Last accessed: 30.07.2024. HTML. [<ref target="https://www.ipk.fraunhofer.de/de/medien/futur/futur-2021-2/zum-ersten-zum-zweiten-zum-dritten-gefunden.html">online</ref>] </bibl>               
               <bibl xml:id="gatys_et_al_algorithm_2015">Leon A. Gatys&#160;/ Alexander S. Ecker&#160;/ Matthias Bethge: A Neural Algorithm of Artistic Style. arXiv. 26.08.2015. Version 2 from 02.09.2015. DOI: <ref target="https://doi.org/10.48550/arXiv.1508.06576">10.48550/arXiv.1508.06576</ref>
               </bibl>
               <bibl xml:id="gatys_et_al_image_2016">Leon A. Gatys&#160;/ Alexander S. Ecker&#160;/ Matthias Bethge: Image Style Transfer using Convolutional Neural Networks. In: 29th IEEE Conference on Computer Vision and Pattern Recognition. CVPR 2016. Proceedings (Las Vegas, 27.–30.06.2016). Los Alamitos, US-CA etc. 2016, pp.&#160;2414–2423. PDF. [<ref target="https://openaccess.thecvf.com/content_cvpr_2016/papers/Gatys_Image_Style_Transfer_CVPR_2016_paper.pdf">online</ref>]</bibl>
               <bibl xml:id="geirhos_et_al_2018">Robert Geirhos&#160;/ Patricia Rubisch&#160;/ Claudio Michaelis&#160;/ Matthias Bethge&#160;/ Felix A. Wichmann&#160;/ Wieland Brendel: ImageNet-Trained CNNs are Biased towards Texture. Increasing Shape Bias Improves Accuracy and Robustness. OpenReview.net. 21.12.2018. Last modified: 08.02.2026. PDF&#160;/ HTML. [<ref target="https://openreview.net/forum?id=Bygh9j09KX&amp;trk=public_post_comment-text">online</ref>]</bibl>
               <bibl xml:id="glaf_provenance_2020">German Lost Art Foundation&#160;/ Arbeitskreis Provenienzforschung e.V.&#160;/ Arbeitskreis Provenienzforschung und Restitution&#160;– Bibliotheken&#160;/ Deutscher Bibliotheksverband e.V.&#160;/ Deutscher Museumsbund e.V.&#160;/ ICOM Deutschland e.V. (eds.): Provenance Resource Manual. To Identify Cultural Property Seized due to Persecution during the National Socialist Era. 2020. PDF. [<ref target="https://kulturgutverluste.de/sites/default/files/2023-06/Manual.pdf">online</ref>]</bibl>
               <bibl xml:id="germansales_2025">German Sales. Last accessed: 03.09.2025. HTML. DOI: <ref target="https://doi.org/10.11588/portal.gs">10.11588/portal.gs</ref>
               </bibl>
               <bibl xml:id="germansales_project_2024">German Sales: Project description. Last accessed: 24.07.2024. HTML. [<ref target="https://digi.ub.uni-heidelberg.de/en/germansales//ueberuns/beschreibung.html">online</ref>]</bibl>
               <bibl xml:id="girshick_et_al_hierarchies_2014">Ross Girshick&#160;/ Jeff Donahue&#160;/ Trevor Darrell&#160;/ Jitendra Malik: Rich Feature Hierarchies for Accurate Object Detection and Semantic Segmentation. In: 2014 IEEE Conference on Computer Vision and Pattern Recognition. CVPR 2014. Proceedings (Columbus, US-OH, 23.–28.06.2014). Los Alamitos, US-CA etc. 2014, pp.&#160;580–587. PDF. [<ref target="https://openaccess.thecvf.com/content_cvpr_2014/papers/Girshick_Rich_Feature_Hierarchies_2014_CVPR_paper.pdf">online</ref>]</bibl>
               <bibl xml:id="girshick_rcnn_2015">Ross Girshick: Fast R-CNN. In: 2015 IEEE International Conference on Computer Vision. ICCV 2015. Proceedings (Santiago, CL, 11.–18.12.2015). Los Alamitos, US-CA etc. 2015, pp.&#160;1440–1448. PDF. [<ref target="https://openaccess.thecvf.com/content_iccv_2015/papers/Girshick_Fast_R-CNN_ICCV_2015_paper.pdf">online</ref>]</bibl>
               <bibl xml:id="gonthier_et_al_object_2018">Nicolas Gonthier&#160;/ Yann Gousseau&#160;/ Saïd Ladjal&#160;/ Olivier Bonfait: Weakly Supervised Object Detection in Artworks. arXiv. 05.10.2018. PDF. DOI: <ref target="https://doi.org/10.48550/arXiv.1810.02569">10.48550/arXiv.1810.02569</ref></bibl>
               <bibl xml:id="gonthier_et_al_instance_2022">Nicolas Gonthier&#160;/ Saïd Ladjal&#160;/ Yann Gousseau: Multiple Instance Learning on Deep Features for Weakly Supervised Object Detection with Extreme Domain Shifts. In: Computer Vision and Image Understanding 214 (2022), 103299. DOI: 10.1016/j.cviu.2021.103299</bibl>
               <bibl xml:id="haffner_provenienzforschung_2019">Dorothee Haffner: Provenienzforschung digital vernetzt. Ergebnisse sichtbar machen. In: Museumskunde 84 (2019), pp.&#160;90–97. Last accessed: 29.07.2024. PDF. [<ref target="https://www.museumsbund.de/wp-content/uploads/2022/07/museumskunde-2019-1-online.pdf">online</ref>]</bibl>
               <bibl xml:id="haffner_provenienzen_2020">Dorothee Haffner: Provenienzen in Sammlungsdatenbanken. Digitale und virtuelle Chancen für die Vermittlung. In: Deutsches Zentrum Kulturgutverluste (ed.): Digitale Provenienzforschung (=Provenienz &amp; Forschung, 1). Dresden 2020, pp.&#160;36–42. <ptr type="gbv" cRef="1699332177"/>
               </bibl>
               <bibl xml:id="he_et_al_product_2012">Junfeng He&#160;/ Jinyuan Feng&#160;/ Xianglong Liu&#160;/ Tao Cheng&#160;/ Tai-Hsu Lin&#160;/ Hyunjin Chung&#160;/ Shih-Fu Chang: Mobile Product Search with Bag of Hash Bits and Boundary Reranking. In: 2012 IEEE Conference on Computer Vision and Pattern Recognition. CVPR 2012. Proceedings (Providence, US-RI, 16.–21.06.2012). Los Alamitos, US-CA etc. 2012, pp.&#160;3005–3012. DOI: 10.1109/CVPR.2012.6248030</bibl>
               <bibl xml:id="he_et_al_learning_2016">Kaiming He&#160;/ Xiangyu Zhang&#160;/ Shaoqing Ren&#160;/ Jian Sun: Deep Residual Learning for Image Recognition. In: 29th IEEE Conference on Computer Vision and Pattern Recognition. CVPR 2016. Proceedings (Las Vegas, 27.–30.06.2016). Los Alamitos, US-CA etc. 2016, pp.&#160;770–778. PDF. [<ref target="https://openaccess.thecvf.com/content_cvpr_2016/papers/He_Deep_Residual_Learning_CVPR_2016_paper.pdf">online</ref>]</bibl>
               <bibl xml:id="hoffmann_kuhn_kunsthaendler_2016">Meike Hoffmann and Nicola Kuhn: Hitlers Kunsthändler: Hildebrand Gurlitt 1895–1956. Munich 2016. <ptr type="gbv" cRef="840039794"/>
               </bibl>
               <bibl xml:id="hopp_kunsthandel_2012">Meike Hopp: Kunsthandel im Nationalsozialismus: Adolf Weinmüller in München und Wien. Cologne etc. 2012. <ptr type="gbv" cRef="659255235"/>
               </bibl>
               <bibl xml:id="hopp_provenienzrecherche_2018">Meike Hopp: Provenienzrecherche und digitale Forschungsinfrastrukturen in Deutschland: Bedürfnisse, Desiderate, Tendenzen. In: Eva Blimlinger&#160;/ Heinz Schödl (eds.): …(k)ein Ende in Sicht. 20 Jahre Kunstrückgabegesetz in Österreich (=&#160;Schriftenreihe der Kommission für Provenienzforschung, 8). Vienna 2018, pp.&#160;35–59. DOI: <ref target="https://doi.org/10.7767/9783205201274.37">10.7767/9783205201274.37</ref>
               </bibl>
               <bibl xml:id="huemer_sales_2014">Christian Huemer: The »German Sales 1930–1945« Database Project. In: Collections 10 (2014), No. 3, pp.&#160;273–278. DOI: 10.1177/155019061401000306</bibl>
               <bibl xml:id="impett_suesstrunk_pose_2016">Leonardo Impett&#160;/ Sabine Süsstrunk: Pose and Pathosformel in Aby Warburg’s Bilderatlas. In: Gang Hua&#160;/ Hervé Jégou (eds.): Computer Vision&#160;– ECCV 2016 Workshops. Proceedings. Part I (Amsterdam, 08.–10.10.2016 and 15.–16.10.2016). Cham, CH 2016, pp.&#160;888–902. HTML&#160;/ PDF. DOI: <ref target="https://doi.org/10.1007/978-3-319-46604-0_61">10.1007/978-3-319-46604-0_61</ref>
               </bibl>
               <bibl xml:id="impett_moretti_totentanz_2017">Leonardo Impett&#160;/ Franco Moretti: Totentanz. Operationalizing Aby Warburg’s Pathosformeln. In: New Left Review (2017), No. 107, pp.&#160;68–97. <ptr type="gbv" cRef="341341037"/>
               </bibl>
               <bibl xml:id="impett_gesture_2020">Leonardo Impett: Analyzing Gesture in Digital Art History. In: Kathryn Brown (ed.): The Routledge Companion to Digital Humanities and Art History. London etc. 2020, pp.&#160;386–407. <ptr type="gbv" cRef="1685846653"/>
               </bibl>
               <bibl xml:id="jeon_et_al_object_2020">Hyeong-Ju Jeon&#160;/ Soonchul Jung&#160;/ Yoon-Seok Choi&#160;/ Jae Woo Kim&#160;/ Jin Seo Kim: Object Detection in Artworks Using Data Augmentation. In: ICTC 2020. The 11<hi rend="super">th</hi> International Conference on Information and Communication Technology Convergence. Data, Network, and AI in the Age of ›Untact‹ (Jeju, KR,  21.–23.10.2020). Jeju, KR 2020, pp.&#160;1312–1314. DOI: 10.1109/ICTC49870.2020.9289321</bibl>
               <bibl xml:id="jeuthe_kunstwerte_2014">Gesa Jeuthe: Kunstwerte im Wandel: Die Preisentwicklung der deutschen Moderne im nationalen und internationalen Kunstmarkt 1925 bis 1955. Vol. 7. Berlin 2014. <ptr type="gbv" cRef="64179276X"/>
               </bibl>
               <bibl xml:id="jocher_et_al_ultralytics_2023">Glenn Jocher&#160;/ Ayusg Chaurasia&#160;/ Jing Qiu: Ultralytics YOLO. GitHub. 10.01.2023. Version 8.4.53 from 22.05.2026. [<ref target="https://github.com/ultralytics/ultralytics">online</ref>]</bibl>
               <bibl xml:id="johnson_et_al_similarity_2019">Jeff Johnson&#160;/ Matthijs Douze&#160;/ Hervé Jégou: Billion-Scale Similarity Search with GPUs. In: IEEE Transactions on Big Data 7 (2019), No. 3, pp.&#160;535–547. DOI: 10.1109/TBDATA.2019.2921572</bibl>
               <bibl xml:id="ju_et_al_humanart_2023">Xuan Ju&#160;/ Ailing Zeng&#160;/ Jianan Wang&#160;/ Qiang Xu&#160;/ Lei Zhang: Human-Art. A Versatile Human-Centric Dataset Bridging Natural and Artificial Scenes. In: 2023 IEEE/CVF Conference on Computer Vision and Pattern Recognition. CVPR 2023. Proceedings (Vancouver, 17.–24.06.2023). Los Alamitos, US-CA etc., pp.&#160;618–629. PDF. [<ref target="https://openaccess.thecvf.com/content/CVPR2023/papers/Ju_Human-Art_A_Versatile_Human-Centric_Dataset_Bridging_Natural_and_Artificial_Scenes_CVPR_2023_paper.pdf">online</ref>]</bibl>
               <bibl xml:id="kadish_et_al_improving_2021">David Kadish&#160;/ Sebastian Risi&#160;/ Anders Sundnes Løvlie: Improving Object Detection in Art Images Using Only Style Transfer. In: 2021 International Joint Conference on Neural Networks. Proceedings (IJCNN, Online, 18.–22.07.2021). Shenzhen, CHN 2021. PDF. DOI: 10.1109/IJCNN52387.2021.9534264</bibl>
               <bibl xml:id="kimmich_ungefaehre_2017">Dorothee Kimmich: Ins Ungefähre: Ähnlichkeit und Moderne. Constance 2017. <ptr type="gbv" cRef="877672733"/>
               </bibl>
               <bibl xml:id="kettererkunst_auction_2018a">Ketterer Kunst (2018a). Auction on May 18, 2018, in Munich. Lot 15, Heinrich Bürkel »Der Kochelsee mit den Häusern von Schlehdorf«.[<ref target="https://www.kettererkunst.de/kunst/kd/details.php?obnr=118000191&amp;anummer=465">online</ref>]</bibl>
               <bibl xml:id="kettererkunst_auction_2018b">Ketterer Kunst (2018b). Auction on November 23, 2018, in Munich. Lot 28, Adrian Ludwig Richter »Hirten am Feuer (Abendlandschaft)«. [<ref target="https://www.kettererkunst.de/kunst/kd/details.php?obnr=117002167&amp;anummer=474">online</ref>]</bibl>
               <bibl xml:id="kettererkunst_auction_2019">Ketterer Kunst (2019). Auction on May 24, 2019, in Munich. Lot 57, Ludwig von Zumbusch »Einsames Land«. [<ref target="https://www.kettererkunst.de/kunst/kd/details.php?obnr=119000037&amp;anummer=484">online</ref>] </bibl>
               <bibl xml:id="lang_provenienzforschung_2023a">Sabine Lang (2023a): Wie hat sich Provenienzforschung durch Digitalität verändert? In: Sebastian Finsterwalder (ed.): RETOUR. Freier Blog für Provenienzforschende. 07.08.2023. HTML. [<ref target="https://retour.hypotheses.org/2916">online</ref>] </bibl>
               <bibl xml:id="lang_gap_2023b">Sabine Lang (2023b): »Mind the Gap«: Von Lücken in der Provenienzforschung und ihrer Präsenz im digitalen Raum. In: Peer Trilcke&#160;/ Anna Busch&#160;/ Patrick Helling (eds.): DHd 2023. Open Humanities, Open Culture. 9. Jahrestagung des Verbands Digital Humanities im deutschsprachigen Raum. Conference Abstracts. (Trier, Luxembourg, 13.–17.03.2023). Trier etc. 2023, pp.&#160;212–217. PDF. DOI: <ref target="https://doi.org/10.5281/zenodo.7715420">10.5281/zenodo.7715420</ref>
               </bibl>
               <bibl xml:id="lin_et_al_microsoft_2014">Tsung-Yi Lin&#160;/ Michael Maire&#160;/ Serge Belongie&#160;/ Lubomir Bourdev&#160;/ Ross Girshick&#160;/ James Hays&#160;/ Pietro Perona&#160;/ Deva Ramanan&#160;/ C. Lawrence Zitnick&#160;/ Piotr Dollár: Microsoft COCO. Common Objects In Context. In: David Fleet&#160;/ Tomas Pajdla&#160;/ Bernt Schiele&#160;/ Tinne Tuytelaars (eds.): Computer Vision&#160;– ECCV 2014. Proceedings. Part V (Zurich, 06.–12.09.2014). Cham, CH 2014, pp.&#160;744–750. PDF. DOI: <ref target="https://doi.org/10.1007/978-3-319-10602-1_48">10.1007/978-3-319-10602-1_48</ref>
               </bibl>
               <bibl xml:id="liu_et_al_ssd_2016">Wei Liu&#160;/ Dragomir Anguelov&#160;/ Dumitru Erhan&#160;/ Christian Szegedy&#160;/ Scott Reed&#160;/ Chen-Yang Fu&#160;/ Alexander C. Berg: SSD. Single Shot MultiBox Detector. In: Computer Vision&#160;– ECCV 2016. Proceedings. Part I (Amsterdam, 11.–14.10.2016). Cham, CH 2016, pp.&#160;21–37. DOI: <ref target="https://doi.org/10.1007/978-3-319-46448-0_2">10.1007/978-3-319-46448-0_2</ref>
               </bibl>
               <bibl xml:id="lowe_image_2004">David G. Lowe: Distinctive Image Features from Scale-Invariant Keypoints. In: International Journal of Computer Vision 60 (2004), pp.&#160;91–110. DOI: 10.1023/B:VISI.0000029664.99615.94 </bibl>
               <bibl xml:id="lu_et_al_image_2021">Yue Lu&#160;/ Chao Guo&#160;/ Xingyuan Dai&#160;/ Fei-Yue Wang: Image Captioning on Fine Art Paintings via Virtual Paintings. In: IEEE 1<hi rend="super">st</hi> International Conference on Digital Twins and Parallel Intelligence. DTPI 2021. Proceedings (Beijing, CHN, 15.07.–15.08.2021). Los Alamitos, US-CA etc., pp.&#160;156–159. DOI: 10.1109/DTPI52967.2021.9540081</bibl>
               <bibl xml:id="madhu_et_al_characters_2019">Prathmesh Madhu&#160;/ Ronak Kosti&#160;/ Lara Mührenberg&#160;/ Peter Bell&#160;/ Andreas Maier&#160;/ Vincent Christlein: Recognizing Characters in Art History Using Deep Learning. In: Valerie Gouet-Brunet&#160;/ Margarita Khokhlova&#x160;/ Liming Chen (eds.): SUMAC ’19. Proceedings of the 1<hi rend="super">st</hi> Workshop on Structuring and Understanding of Multimedia Heritage Contents (Nice, FR, 21.10.2019). New York 2019, pp.&#160;15–22. DOI: <ref target="https://doi.org/10.1145/3347317.3357242">10.1145/3347317.3357242</ref></bibl>
               <bibl xml:id="madhu_et_al_structures_2020">Prathmesh Madhu&#160;/ Tilman Marquart&#160;/ Ronak Kosti&#160;/ Peter Bell&#160;/ Vincent Christlein: Understanding Compositional Structures in Art Historical Images Using Pose and Gaze Priors. Towards Scene Understanding in Art History. In: Adrien Bartoli&#160;/ Andrea Fusiello (eds.): Computer Vision&#160;– ECCV 2020 Workshops. Proceedings. Part II (Online, 23.–28.08.2020). Cham, CH 2020, pp.&#160;109–125. DOI: 10.1007/978-3-030-66096-3_9</bibl>
               <bibl xml:id="madhu_et_al_icc_2023">Prathmesh Madhu&#160;/ Tilman Marquart&#160;/ Ronak Kosti&#160;/ Dirk Suckow&#160;/ Peter Bell&#160;/ Andreas Maier&#160;/ Vincent Christlein: ICC++. Explainable Feature Learning for Art History using Image Compositions. In: Pattern Recognition 136 (2023), p. 109153. DOI: 10.1016/j.patcog.2022.109153</bibl>
               <bibl xml:id="mariani_paalen_2022">Fabio Mariani: »Probably Sold to Paalen, Possibly by Exchange«: Vagueness, Incompleteness, Subjectivity and Uncertainty in Digital Art Provenance. In: Workshop on Computational Methods in the Humanities 2022 (COMHUM 2022, Lausanne, 09.–10.06.2022). Lausanne 2022. PDF. [<ref target="https://wp.unil.ch/llist/files/2022/06/COMHUM_2022_paper_5.pdf">online</ref>]</bibl>
               <bibl xml:id="marinescu_et_al_object_2020">Maria-Cristina Marinescu&#160;/ Artem Reshetnikov&#160;/ Joaquim Moré López: Improving Object Detection in Paintings Based on Time Contexts. In: Alfredo Cuzzocrea&#160;/ Carlo Zaniolo (eds.): 2020 International Conference on Data Mining Workshops. Proceedings (ICDMW, Sorrento, IT, 17.–20.11.2020). Sorrento, IT 2020, pp.&#160;926–932. PDF. DOI: 10.1109/ICDMW51313.2020.00133 </bibl>
               <bibl xml:id="mermet_et_al_face_2020">Alexis Mermet&#160;/ Asanobu Kitamoto&#160;/ Chikahiko Suzuki&#160;/ Akira Takagishi: Face Detection on Pre-modern Japanese Artworks using R-CNN and Image Patching for Semi-Automatic Annotation. In: Valerie Gouet-Brunet&#160;/ Margarita Khokhlova&#160;/ Ronak Kosti&#160;/ Liming Chen&#160;/ Xu-Cheng Yin (eds.): SUMAC’20. Proceedings of the 2<hi rend="super">nd</hi> Workshop on Structuring and Understanding of Multimedia Heritage Contents (Seattle, 12.10.2020). New York 2020, pp.&#160;23–31. DOI: <ref target="https://doi.org/10.1145/3423323.3423412">10.1145/3423323.3423412</ref>
               </bibl>
               <bibl xml:id="musgrave_et_al_learning_2020">Kevin Musgrave&#160;/ Serge Belongie&#160;/ Ser-Nam Lim: A Metric Learning Reality Check. In: Andrea Vedaldi&#160;/ Horst Bischof&#160;/ Thomas Brox&#160;/ Jan-Michael Frahm (eds.): Computer Vision&#160;– ECCV 2020. Proceedings. Part XXV (Online, 23.–28.08.2020). Cham, CH 2020, pp.&#160;681–699. PDF. DOI: 10.1007/978-3-030-58595-2_41</bibl>
               <bibl xml:id="neumeister_2024">Neumeister. Last accessed: 24.07.2024. HTML. [<ref target="https://www.neumeister.com/kunstwerksuche/kunstdatenbank/ergebnis/193-298/johann-sperl-sommerlust/">online</ref>] </bibl>
               <bibl xml:id="nister_stewenius_recognition_2006">David Nistér&#160;/ Henrik Stewénius: Scalable Recognition with a Vocabulary Tree. In: Dan Huttenlocher&#160;/ David Forsyth (eds.): 2006 IEEE Computer Society Conference on Computer Vision and Pattern Recognition. CVPR 2006. Proceedings. Volume 2 (New York, 17.–22.06.2006). Los Alamitos, US-CA etc. 2006, pp.&#160;2161–2168. PDF. DOI: 10.1109/CVPR.2006.264</bibl>
               <bibl xml:id="offert_bell_imgsai_2024">Fabian Offert&#160;/ Peter Bell: IMGS.AI. A Mulitmodal Search Engine for Digital Art History. In: International Journal for Digital Art History 9 (2024), p. 5.28–5.39. DOI: <ref target="https://doi.org/10.11588/dahj.2023.9.91295">10.11588/dahj.2023.9.91295</ref>
               </bibl>
               <bibl xml:id="openmmlab_pretraining_2024a">OpenMMLab Pre-training Toolbox and Benchmark. Last accessed: 30.07.2024. HTML. [<ref target="https://github.com/open-mmlab/mmpretrain">online</ref>]</bibl>
               <bibl xml:id="openmmlab_pose_2024b">OpenMMLab Pose Estimation Toolbox and Benchmark. Last accessed: 30.07.2024. [<ref target="https://github.com/open-mmlab/mmpose">online</ref>]</bibl>
               <bibl xml:id="patoliya_et_al_smell_2024">Vishal Patoliya&#160;/ Mathias Zinnen&#160;/ Andreas Maier&#160;/ Vincent Christlein: Smell and Emotion. Recognising Emotions in Smell-Related Artworks. arXiv. 05.07.2024. DOI: <ref target="https://doi.org/10.48550/arXiv.2407.04592">10.48550/arXiv.2407.04592</ref>
               </bibl>
               <bibl xml:id="philbin_et_al_quantization_2008">James Philbin&#160;/ Ondrey Chum&#160;/ Michaeal Isard&#160;/ Josef Sivic&#160;/ Andrew Zisserman: Lost in Quantization. Improving Particular Object Retrieval in Large Scale Image Databases. In: Linda Shapiro&#160;/ Narendra Ahuja (eds.): 2008 IEEE Conference on Computer Vision and Pattern Recognition. Proceedings (CVPR, Anchorage, US-AK, 23–28.06.2008). Los Alamitos, US-CA etc. 2008. PDF&#160;/ HTML. DOI: 10.1109/CVPR.2008.4587635</bibl>
               <bibl xml:id="portapp_2025">PortApp. 2025. Last accessed: 05.02.2026. HTML. [<ref target="https://www.hab.de/automatische-bilderkennung-fruehneuzeitlicher-portraetgrafik-als-app/">online</ref>]</bibl>
               <bibl xml:id="pymupdf_2024">PyMuPDF. Last accessed: 30.07.2024. HTML. [<ref target="https://github.com/pymupdf/PyMuPDF">online</ref>]</bibl>
               <bibl xml:id="radford_et_al_models_2021">Alec Radford&#160;/ Jong Wook Kim&#160;/ Chris Hallacy&#160;/ Aditya Ramesh&#160;/ Gabriel Goh&#160;/ Sandhini Agarwal&#160;/ Girish Sastry&#160;/ Amanda Askell&#160;/ Pamela Mishkin&#160;/ Jack Clark&#160;/ Gretchen Krueger&#160;/ Ilya Sutskever: Learning Transferable Visual Models from Natural Language Supervision. In: Marina Meila&#160;/ Tong Zhang (eds.): Proceedings of the 38<hi rend="super">th</hi> International Conference on Machine Learning (PMLR 139, Online, 18.-24.07.2021). Cambridge, US-MA 2021, pp.&#160;8748–8763. PDF. [<ref target="http://proceedings.mlr.press/v139/radford21a">online</ref>]</bibl>
               <bibl xml:id="redmon_et_al_you_2016">Joseph Redmon&#160;/ Santosh Divvala&#160;/ Ross Girshick&#160;/ Ali Farhadi: You Only Look Once. Unified, Real-Time Object Detection. In: Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition. 2016, pp.&#160;779–788. PDF. [<ref target="https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Redmon_You_Only_Look_CVPR_2016_paper.pdf">online</ref>] </bibl>
               <bibl xml:id="ren_et_al_rcnn_2016">Shaoqing Ren&#160;/ Kaiming He&#160;/ Ross Girshick&#160;/ Jian Sun: Faster R-CNN. Towards Real-Time Object Detection with Region Proposal Networks. In: IEEE Transactions on Pattern Analysis and Machine Intelligence 39 (2016), No. 6, pp.&#160;1137–1149. DOI: 10.1109/TPAMI.2016.2577031 </bibl>
               <bibl xml:id="reshetnikov_et_al_deart_2022">Artem Reshetnikov&#160;/ Maria-Cristina Marinescu&#160;/ Joaquim Moré López: DEArt. Dataset of European Art. In: Leonid Karlinsky&#160;/ Tomer Michaeli&#160;/ Ko Nishino (eds.): Computer Vision&#160;– ECCV 2022 Workshops. Proceedings. Part I (Tel Aviv, 23.–27.10.2022). Cham, CH 2022, pp.&#160;218–233. PDF. DOI: 10.1007/978-3-031-25056-9_15 </bibl>
               <bibl xml:id="ridnik_et_al_imagenet_2021">Tal Ridnik&#160;/ Emanuel Ben-Baruch&#160;/ Asaf Noy&#160;/ Lihi Zelnik-Manor: ImageNet-21K. Pretraining for the Masses. arXiv. 22.04.2021. Version 4 from 05.08.2021. PDF. DOI: <ref target="https://doi.org/10.48550/arXiv.2104.10972">10.48550/arXiv.2104.10972</ref></bibl>
               <bibl xml:id="rother_et_al_care_2022">Lynn Rother&#160;/ Fabio Mariani&#160;/ Max Koss: Taking Care of History: Toward a Politics of Provenance Linked Open Data in Museums. In: Emily Lew Fry&#160;/ Erin Canning (eds.): Perspectives on Data. Chicago 2022. HTML. DOI: <ref target="https://doi.org/10.53269/9780865593152/06">10.53269/9780865593152/06</ref>
               </bibl>
               <bibl xml:id="rother_et_al_value_2023">Lynn Rother&#160;/ Fabio Mariani&#160;/ Max Koss: Hidden Value: Provenance as a Source for Economic and Social History. In: Jahrbuch für Wirtschaftsgeschichte&#160;/ Economic History Yearbook 64 (2023), No. 1, pp.&#160;111–142. DOI: <ref target="https://doi.org/10.1515/jbwg-2023-0005">10.1515/jbwg-2023-0005</ref>
               </bibl>
               <bibl xml:id="russakovsky_et_al_imagenet_2015">Olga Russakovsky&#160;/ Jia Deng&#160;/ Hao Su&#160;/ Jonathan Krause&#160;/ Sanjeev Satheesh&#160;/ Sean Ma&#160;/ Zhiheng Huang&#160;/ Andrej Karpathy&#160;/ Aditya Kosla&#160;/ Michael Bernstein&#160;/ Alexander C. Berg&#160;/ Li Fei-Fei: ImageNet Large Scale Visual Recognition Challenge. In: International Journal of Computer Vision 155 (2015), No. 3, pp.&#160;211–252. PDF. DOI: 10.1007/s11263-015-0816-y </bibl>
               <bibl xml:id="sanakoyeu_et_al_content_2018">Artsiom Sanakoyeu&#160;/ Dmytro Kotovenko&#160;/ Sabine Lang&#160;/ Björn Ommer: A Style-Aware Content Loss for Real-Time HD Style Transfer. In: Vittorio Ferrari&#160;/ Martial Hebert&#160;/ Christian Sminchisecu&#160; Yair Weiss (eds.): Computer Vision – ECCV 2018. 15<hi rend="super">th</hi> European Conference. Proceedings. Part VIII (Munich, 08.–14.09.2018). Cham, CH 2018, pp.&#160;698–714. HTML&#160;/ PDF. DOI: <ref target="https://doi.org/10.1007/978-3-030-01237-3_43">10.1007/978-3-030-01237-3_43</ref></bibl>
               <bibl xml:id="sartini_gangemi_symbolism_2021">Bruno Sartini&#160;/ Aldo Gangemi: Towards the Unchaining of Symbolism from Knowledge Graphs. How Symbolic Relationships Can Link Cultures. In: Federico Boschetti&#160;/ Angelo Mario Del Grosso&#160;/ Enrica Salvatori (eds.): AIUCD 2021&#160;– DHs for Society: E-Quality, Participation, Rights and Values in the Digital Age. Book of Extended Abstracts of the 10<hi rend="super">th</hi> National Conference (Pisa, 19.–22.01.2021). Pisa 2021, pp.&#160;576–580. PDF. [<ref target="https://amsacta.unibo.it/id/eprint/6712/1/AIUCD2021_BOA-versione3A.pdf">online</ref>]</bibl>
               <bibl xml:id="sartini_et_al_icon_2023">Bruno Sartini&#160;/ Sofia Baroncini&#160;/ Marieke van Arp&#160;/ Francesca Tomasi&#160;/ Aldo Gangemi: ICON. An Ontology for Comprehensive Artistic Interpretations. In: ACM Journal on Computing and Cultural Heritage 16 (2023), No. 3, pp.&#160;1–38. DOI: <ref target="https://doi.org/10.1145/3594724">10.1145/3594724</ref>
               </bibl>
               <bibl xml:id="scheithauer_et_al_auction_2024">Hugo Scheithauer&#160;/ Sarah Bénière&#160;/ Laurent Romary: Automatic Retro-Structuration of Auction Sales Catalogs Layout and Content. HAL Open Science. 15.04.2024. HTML&#160;/ PDF. [<ref target="https://hal.science/hal-04547239/">online</ref>] </bibl>
               <bibl xml:id="schich_et_al_network_2017">Maximilian Schich&#160;/ Christian Huemer&#160;/ Piotr Adamczyk&#160;/ Lev Manovich&#160;/ Yang-Yu Liu: Network Dimensions in the Getty Provenance Index. arXiv. 09.06.2017. DOI: <ref target="https://doi.org/10.48550/arXiv.1706.02804">10.48550/arXiv.1706.02804</ref>
               </bibl>
               <bibl xml:id="seguin_et_al_link_2016">Benoit Seguin&#160;/ Carlotta Striolo&#160;/ Isabella diLenardo&#160;/ Frederic Kaplan: Visual Link Retrieval in a Database of Paintings. In: Gang Hua&#160;/ Hervé Jégou (eds.): Computer Vision&#160;– ECCV 2016 Workshops. Proceedings. Part I (Amsterdam, 08.–10.10.2016 and 15.–16.10.2016). Cham, CH 2016, pp.&#160;753–767. PDF. DOI: <ref target="https://doi.org/10.1007/978-3-319-46604-0_52">10.1007/978-3-319-46604-0_52</ref>
               </bibl>
               <bibl xml:id="shen_et_al_patterns_2019">XI Shen&#160;/ Alexei A. Efros&#160;/ Mathieu Aubry: Discovering Visual Patterns in Art Collections with Spatially-Consistent Feature Learning. In: 2019 IEEE&#160;/ CVF Conference on Computer Vision and Pattern Recognition. CVPR 2019. Proceedings (Long Beach, US-CA, 16.–20.06.2019). Los Alamitos, US-CA etc. 2019, pp.&#160;9278–9287. PDF. [<ref target="https://openaccess.thecvf.com/content_CVPR_2019/papers/Shen_Discovering_Visual_Patterns_in_Art_Collections_With_Spatially-Consistent_Feature_Learning_CVPR_2019_paper.pdf">online</ref>]</bibl>
               <bibl xml:id="smirnov_eguizabal_learning_2018">Stanislav Smirnov&#160;/ Alma Eguizabal: Deep Learning for Object Detection in Fine-Art Paintings. In: 2018 Metrology for Archaeology and Cultural Heritage (MetroArchaeo, Cassino, IT, 22.–24.10.2018). Cassino, IT 2018, pp.&#160;45–49. PDF. DOI: 10.1109/MetroArchaeo43810.2018.9089828 </bibl>
               <bibl xml:id="springstein_et_al_iart_2021">Matthias Springstein&#160;/ Stefanie Schneider&#160;/ Javad Rahnama&#160;/ Eyke Hüllermeier&#160;/ Hubertus Kohle&#160;/ Ralph Ewerth: iART: A Search Engine for Art-Historical Images to Support Research in the Humanities. In: Heng Tao Shen&#160;/ Yueting Zhuang&#160;/ John R. Smith (eds.): MM ’21. Proceedings of the 29<hi rend="super">th</hi> ACM International Conference on Multimedia (Online, 20.–24.10.2021). New York 2021, pp.&#160;2801–2803. PDF. DOI: <ref target="https://doi.org/10.1145/3474085.3478564">10.1145/3474085.3478564</ref>
               </bibl>
               <bibl xml:id="springstein_et_al_pose_2022">Mathias Springstein&#160;/ Stefanie Schneider&#160;/ Christian Althaus&#160;/ Ralph Ewerth: Semi-supervised Human Pose Estimation in Art-historical Images. In: João Magalhães&#160;/ Alberto del Bimbo&#160;/ Shin'ichi Satoh&#160;/ Nicu Sebe (eds.): MM ’22. Proceedings of the 30<hi rend="super">th</hi> ACM International Conference on Multimedia (Lisbon, 10.–14.10.2022). New York 2022, pp.&#160;1107–1116. PDF. DOI: <ref target="https://doi.org/10.1145/3503161.3548371">10.1145/3503161.3548371</ref>
               </bibl>
               <bibl xml:id="sun_et_al_object_2015">Shaoyan Sun&#160;/ Wengang Zhou&#160;/ Qi Tian&#160;/ Houqiang Li: Scalable Object Retrieval with Compact Image Representation from Generic Object Regions. In: ACM Transactions on Multimedia Computing, Communications, and Applications 12 (2015), No. 2. PDF. DOI: <ref target="https://doi.org/10.1145/2818708">10.1145/2818708</ref>
               </bibl>
               <bibl xml:id="thyagharajan_kalaiarsi_review_2021">K. K. Thyagharajan&#160;/ G. Kalaiarasi: A Review on Near-Duplicate Detection of Images Using Computer Vision Techniques. In: Archives of Computational Methods in Engineering 28 (2021), No. 3, pp.&#160;897–916. PDF. DOI: 10.1007/s11831-020-09400-w </bibl>
               <bibl xml:id="tolias_et_al_object_2015">Georgos Tolias&#160;/ Ronan Sicre&#160;/ Hervé Jégou: Particular Object Retrieval with Integral Max-Pooling of CNN Activations. arXiv. 18.11.2015. Version 2 from 24.02.2016. PDF. DOI: <ref target="https://doi.org/10.48550/arXiv.1511.05879">10.48550/arXiv.1511.05879</ref>
               </bibl>
               <bibl xml:id="ufer_et_al_retrieval_2021">Nikolai Ufer&#160;/ Max Simon&#160;/ Sabine Lang&#160;/ Björn Ommer: Large-Scale Interactive Retrieval in Art Collections Using Multi-Style Feature Aggregation. In: PloS One 16 (2021), No. 11. HTML&#160;/ PDF. DOI: <ref target="https://doi.org/10.1371/journal.pone.0259718">10.1371/journal.pone.0259718</ref>
               </bibl>
               <bibl xml:id="vicente-garcia_suche_2024">Raul Vicente-Garcia: Bildbasierte Suche in Auktionskatalogen. In: Museumskunde 89 (2024), no. 1 &amp; 2, pp.&#160;32–38. <ptr type="gbv" cRef="1005573603"/>
               </bibl>
               <bibl xml:id="werner_proveana_2020">Sabrina Werner: Proveana. Zur Entwicklung der Forschungsdatenbank des Deutschen Zentrums Kulturgutverluste. In: Deutsches Zentrum Kulturgutverluste (ed.): Digitale Provenienzforschung (=Provenienz &amp; Forschung, 1). Dresden 2020, pp.&#160;26–36. <ptr type="gbv" cRef="1699332177"/>
               </bibl>
               <bibl xml:id="westlake_et_al_people_2016">Nicholas Westlake&#160;/ Hongping Cai&#160;/ Peter Hall: Detecting People in Artwork with CNNs. In: Gang Hua&#160;/ Hervé Jégou (eds.): Computer Vision&#160;– ECCV 2016 Workshops. Proceedings. Part I (Amsterdam, 08.–10.10.2016 and 15.–16.10.2016). Cham, CH 2016, pp.&#160;825–841. PDF. DOI: <ref target="https://doi.org/10.1007/978-3-319-46604-0_57">10.1007/978-3-319-46604-0_57</ref>
               </bibl>
               <bibl xml:id="winkler_aehnlichkeit_2021">Hartmut Winkler: Ähnlichkeit. Berlin 2021. <ptr type="gbv" cRef="176482413X"/>
               </bibl>
               <bibl xml:id="yokoo_et_al_re-ranking_2020">Shuhei Yokoo&#160;/ Kohei Ozaki&#160;/ Edgar Simo-Serra&#160;/ Satoshi Iizuka: Two-stage Discriminative Re-Ranking for Large-Scale Landmark Retrieval. In: CVPRW 2020. 2020 IEEE&#160;/ CVF Conference on Computer Vision and Pattern Recognition Workshops. Proceedings (Online, 14.–19.06.2020). Los Alamitos, US-CA etc. 2020, pp.&#160;4363–4370. DOI: 10.1109/CVPRW50498.2020.00514</bibl>
               <bibl xml:id="yelizaveta_et_al_analysis_2005">Marchenko Yelizaveta&#160;/ Chua Tat-Seng&#160;/ Aristarkhova Irina: Analysis and Retrieval of Paintings Using Artistic Color Concepts. In: 2005 IEEE International Conference on Multimedia and Expo. Proceedings (Amsterdam, 06.07.2005). Los Alamitos, US-CA etc. 2005, pp.&#160;1246–1249. PDF. DOI: 10.1109/ICME.2005.1521654</bibl>
               <bibl xml:id="zhou_et_al_advance_2017">Wengang Zhou&#160;/ Houqiang Li&#160;/ Qi Tian: Recent Advance in Content-Based Image Retrieval. A Literature Survey. arXiv. 19.06.2017. Version 2 from 02.09.2017. PDF. DOI: <ref target="https://doi.org/10.48550/arXiv.1706.06064">10.48550/arXiv.1706.06064</ref>
               </bibl>
               <bibl xml:id="zinnen_et_al_learning_2022a">Mathias Zinnen&#160;/ Prathmesh Madhu&#160;/ Peter Bell&#160;/ Andreas Maier&#160;/ Vincent Christlein (2022a): Transfer Learning for Olfactory Object Detection. In: Ikki Ohmukai&#160;/ Taizo Yamada (eds.): DH 2022. Digital Humanities 2022. Responding to Asian Diversity. Conference Abstracts (Online&#160;/ Tokyo 25.–29.07.2022). Tokyo 2022, pp.&#160;409–413. PDF. [<ref target="https://dh2022.dhii.asia/dh2022bookofabsts.pdf">online</ref>]</bibl>
               <bibl xml:id="zinnen_et_al_odor_2022b">Mathias Zinnen&#160;/ Prathmesh Madhu&#160;/ Ronak Kosti&#160;/ Peter Bell&#160;/ Andreas Maier&#160;/ Vincent Christlein (2022b): ODOR. The ICPR 2022 Odeuropa Challenge on Olfactory Object Recognition. In: Michael Jenkin&#160;/ Henrik I. Christensen&#160;/ Cheng-Lin Liu (eds.): ICPR 2022. 26<hi rend="super">th</hi> International Conference on Pattern Recognition. Proceedings (Montreal, 21.–25.08.2022). Los Alamitos, US-CA etc. 2022, pp.&#160;4989–4994. PDF. DOI: 10.1109/ICPR56361.2022.9956542 </bibl>
               <bibl xml:id="zinnen_et_al_sniffyart_2023">Mathias Zinnen&#160;/ Azhar Hussian&#160;/ Hang Tran&#160;/ Prathmesh Madhu&#160;/ Andreas Maier&#160;/ Vincent Christlein: SniffyArt. The Dataset of Smelling Persons. In: Valerie Gouet-Brunet&#160;/ Ronak Kosti&#160;/ Li Weng (eds.): SUMAC ’23. Proceedings of the 5<hi rend="super">th</hi> Workshop on Analysis, Understanding and Promotion of Heritage Contents (Ottawa, 02.11.2023). New York 2023, pp.&#160;49–58. PDF. DOI: <ref target="https://doi.org/10.1145/3607542.3617357">10.1145/3607542.3617357</ref>
               </bibl>
               <bibl xml:id="zinnen_et_al_gestures_2025">Mathias Zinnen&#160;/ Azhar Hussian&#160;/ Andreas Maier&#160;/ Vincent Christlein: Recognizing Sensory Gestures in Historical Artworks. In: Multimedia Tools and Applications. An International Journal 84 (2025), pp.&#160;39055–39083. PDF. DOI: <ref target="https://doi.org/10.1007/s11042-024-20502-6">10.1007/s11042-024-20502-6</ref>
               </bibl>
               <bibl xml:id="zou_et_al_object_object_2023">Zhengia Zou&#160;/ Keyan Chen&#160;/ Zhenwei Shi&#160;/ Yuhong Guo&#160;/ Jieping Ye: Object Detection in 20 Years. A Survey. In: Proceedings of the IEEE 111 (2023), No. 3, pp.&#160;257–276. PDF. DOI: 10.1109/JPROC.2023.3238524</bibl>
               <bibl xml:id="zuschlag_einfuehrung_2022">Christoph Zuschlag: Einführung in die Provenienzforschung. Wie die Herkunft von Kulturgut entschlüsselt wird. Munich 2022. <ptr type="gbv" cRef="1776115457"/>
               </bibl>
            </listBibl>
            </div>
               <div type="bibliography">
                  <head>Historical Auctions</head>
                  <listBibl>
               <bibl xml:id="ahh_auction_1927a">Auktionshaus Hugo Helbing (1927a). Auction on March 26, 1927, in Munich. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.48878">10.11588/diglit.48878</ref>
               </bibl>
               <bibl xml:id="ahh_auction_1927b">Auktionshaus Hugo Helbing (1927b). Auction on March 26, 1927, in Munich. Lot 106, Johann Sperl »Sommerlust«. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.48878#0056">10.11588/diglit.48878#0056</ref>
               </bibl>
               <bibl xml:id="ahh_auction_1927c">Auktionshaus Hugo Helbing (1927c). Auction on March 26, 1927, in Munich. Illustration Lot 106, Johann Sperl »Sommerlust«. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.48878#0080">10.11588/diglit.48878#0080</ref>
               </bibl>
               <bibl xml:id="ahh_auction_1931">Auktionshaus Hugo Helbing (1931). Auction on April 14, 1931, in Munich. Lot 49, Ludwig von Zumbusch »Römische Ideallandschaft«. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.49177#0023">10.11588/diglit.49177#0023</ref>
               </bibl>
               <bibl xml:id="neupert_auction_1936">Galerie Neupert. Auction on April 4, 1936, Zurich. Lot 40, Max Buri »Brienzersee«. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.8670#0015">10.11588/diglit.8670#0015</ref>
               </bibl>
               <bibl xml:id="bollag_auction_1931">G. &amp; L. Bollag, Zurich. Auction on October 23, 1931, in Zurich. Lot 57, Otto Frölicher »Barken«. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.6826#0012">10.11588/diglit.6826#0012</ref>
               </bibl>
               <bibl xml:id="ikua_auction_1933">Internationales Kunst- und Auktionshaus (1933). Auction on August 1, 1933, in Berlin. Lot 433, Ludwig von Zumbusch »Landschaft mit Birken und Pappeln«. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.6194#0025">10.11588/diglit.6194#0025</ref>
               </bibl>
               <bibl xml:id="khlempertz_auction_1928a">Kunsthaus Lempertz (1928a). Auction on April 24, 1928, in Cologne. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.17868">10.11588/diglit.17868</ref>
               </bibl>
               <bibl xml:id="khlempertz_auction_1928b">Kunsthaus Lempertz (1928b). Auction on April 24, 1928, in Cologne. Lot 32, Johann Sperl »Kinder auf der Wiese«. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.17868#0021">10.11588/diglit.17868#0021</ref>
               </bibl>
               <bibl xml:id="khlempertz_auction_1928c">Kunsthaus Lempertz (1928c). Auction on April 24, 1928, in Cologne. Illustration Lot 32, Johann Sperl »Kinder auf der Wiese«. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.17868#0059">10.11588/diglit.17868#0059</ref>
               </bibl>
               <bibl xml:id="kpc_auction_1925">Kunstsalon Paul Cassirer (1925). Auction on March 3 and 4, 1925, in Berlin. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.23253">10.11588/diglit.23253</ref>
               </bibl>
               <bibl xml:id="kpc_auction_1930a">Kunstsalon Paul Cassirer (1930a). Auction on November 14, 1930, in Berlin. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.48920">10.11588/diglit.48920</ref>
               </bibl>
               <bibl xml:id="kpc_auction_1930b">Kunstsalon Paul Cassirer (1930b). Auction on November 14, 1930, in Berlin. Lot 88, Johann Sperl »Kind auf der Wiese«. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.48920#0147">10.11588/diglit.48920#0147</ref>
               </bibl>
               <bibl xml:id="kstoeri_auction_1928a">Kunstsalon Dr. Störi (1928a). Auction on March 30 and 31, 1938, in Zurich. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.24601">10.11588/diglit.24601</ref>
               </bibl>
               <bibl xml:id="kstoeri_auction_1928b">Kunstsalon Dr. Störi (1928b). Auction on March 30 and 31, 1938, in Zurich. Lot 26, Guillaumin Armand »Der Kran«. German Sales. DOI: <ref target="https://doi.org/10.11588/diglit.24601#0010">10.11588/diglit.24601#0010</ref>
               </bibl>
            </listBibl>
         </div>
      </back>
   </text>
</TEI>
