Privacy-preserving record linkage (PPRL) determines records representing the same entitywhile guaranteeing the privacy of individuals. A common approach is to encode plaintext data ofrecords into Bloom filters that enable efficient calculation of similarities. A crucial step of PPRL isthe classification of Bloom filter pairs as match or non-match based on computed similarities. In thecontext of record linkage, several weighting schemes and classification methods are available. Themajority of weighting methods determine and adapt weights by applying the Fellegi&Sunter modelfor each attribute. In the PPRL domain, the attributes of a record are encoded in a joint record-levelBloom filter to impede cryptanalysis attacks so that the application of existing attribute-wise weightingapproaches is not feasible. We study methods that use attribute-specific weights in record-levelencodings and integrate weight adaptation approaches based on individual value frequencies. Theexperiments on real-world datasets show that frequency-dependent weighting schemes improve thelinkage quality as well as the robustness with regard to the threshold selection.
%0 Conference Paper
%1 Rohde2023-oc
%A Rohde, Florens
%A Franke, Martin
%A Christen, Victor
%A Rahm, Erhard
%D 2023
%I Gesellschaft für Informatik e.V.
%K area_responsibleai area_bigdata
%T Value-specific weighting for record-level encodings in privacy-preserving record linkage
%X Privacy-preserving record linkage (PPRL) determines records representing the same entitywhile guaranteeing the privacy of individuals. A common approach is to encode plaintext data ofrecords into Bloom filters that enable efficient calculation of similarities. A crucial step of PPRL isthe classification of Bloom filter pairs as match or non-match based on computed similarities. In thecontext of record linkage, several weighting schemes and classification methods are available. Themajority of weighting methods determine and adapt weights by applying the Fellegi&Sunter modelfor each attribute. In the PPRL domain, the attributes of a record are encoded in a joint record-levelBloom filter to impede cryptanalysis attacks so that the application of existing attribute-wise weightingapproaches is not feasible. We study methods that use attribute-specific weights in record-levelencodings and integrate weight adaptation approaches based on individual value frequencies. Theexperiments on real-world datasets show that frequency-dependent weighting schemes improve thelinkage quality as well as the robustness with regard to the threshold selection.
@inproceedings{Rohde2023-oc,
abstract = {Privacy-preserving record linkage (PPRL) determines records representing the same entitywhile guaranteeing the privacy of individuals. A common approach is to encode plaintext data ofrecords into Bloom filters that enable efficient calculation of similarities. A crucial step of PPRL isthe classification of Bloom filter pairs as match or non-match based on computed similarities. In thecontext of record linkage, several weighting schemes and classification methods are available. Themajority of weighting methods determine and adapt weights by applying the Fellegi\&Sunter modelfor each attribute. In the PPRL domain, the attributes of a record are encoded in a joint record-levelBloom filter to impede cryptanalysis attacks so that the application of existing attribute-wise weightingapproaches is not feasible. We study methods that use attribute-specific weights in record-levelencodings and integrate weight adaptation approaches based on individual value frequencies. Theexperiments on real-world datasets show that frequency-dependent weighting schemes improve thelinkage quality as well as the robustness with regard to the threshold selection.},
added-at = {2024-09-10T10:41:24.000+0200},
author = {Rohde, Florens and Franke, Martin and Christen, Victor and Rahm, Erhard},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/2f39ba97bf64f8bfc1c63cc9d5a6951e2/scadsfct},
interhash = {c9c5619d07428536d417486714a5f4b7},
intrahash = {f39ba97bf64f8bfc1c63cc9d5a6951e2},
keywords = {area_responsibleai area_bigdata},
publisher = {Gesellschaft f{\"u}r Informatik e.V.},
timestamp = {2024-11-22T15:44:55.000+0100},
title = {Value-specific weighting for record-level encodings in privacy-preserving record linkage},
year = 2023
}