J. Bevendorff, M. Potthast, M. Hagen, and B. Stein. Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, page 1098--1108. Florence, Italy, Association for Computational Linguistics, (July 2019)
DOI: 10.18653/v1/P19-1104
Abstract
Authorship verification is the task of determining whether two texts were written by the same author. We deal with the adversary task, called authorship obfuscation: preventing verification by altering a to-be-obfuscated text. Our new obfuscation approach (1) models writing style difference as the Jensen-Shannon distance between the character n-gram distributions of texts, and (2) manipulates an author's subconsciously encoded writing style in a sophisticated manner using heuristic search. To obfuscate, we analyze the huge space of textual variants for a paraphrased version of the to-be-obfuscated text that has a sufficient Jensen-Shannon distance at minimal costs in terms of text quality. We analyze, quantify, and illustrate the rationale of this approach, define paraphrasing operators, derive obfuscation thresholds, and develop an effective obfuscation framework. Our authorship obfuscation approach defeats state-of-the-art verification approaches, including unmasking and compression models, while keeping text changes at a minimum.
%0 Conference Paper
%1 bevendorff-etal-2019-heuristic
%A Bevendorff, Janek
%A Potthast, Martin
%A Hagen, Matthias
%A Stein, Benno
%B Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics
%C Florence, Italy
%D 2019
%E Korhonen, Anna
%E Traum, David
%E Màrquez, Llu\'ıs
%I Association for Computational Linguistics
%K imported
%P 1098--1108
%R 10.18653/v1/P19-1104
%T Heuristic Authorship Obfuscation
%U https://aclanthology.org/P19-1104
%X Authorship verification is the task of determining whether two texts were written by the same author. We deal with the adversary task, called authorship obfuscation: preventing verification by altering a to-be-obfuscated text. Our new obfuscation approach (1) models writing style difference as the Jensen-Shannon distance between the character n-gram distributions of texts, and (2) manipulates an author's subconsciously encoded writing style in a sophisticated manner using heuristic search. To obfuscate, we analyze the huge space of textual variants for a paraphrased version of the to-be-obfuscated text that has a sufficient Jensen-Shannon distance at minimal costs in terms of text quality. We analyze, quantify, and illustrate the rationale of this approach, define paraphrasing operators, derive obfuscation thresholds, and develop an effective obfuscation framework. Our authorship obfuscation approach defeats state-of-the-art verification approaches, including unmasking and compression models, while keeping text changes at a minimum.
@inproceedings{bevendorff-etal-2019-heuristic,
abstract = {Authorship verification is the task of determining whether two texts were written by the same author. We deal with the adversary task, called authorship obfuscation: preventing verification by altering a to-be-obfuscated text. Our new obfuscation approach (1) models writing style difference as the Jensen-Shannon distance between the character n-gram distributions of texts, and (2) manipulates an author{'}s subconsciously encoded writing style in a sophisticated manner using heuristic search. To obfuscate, we analyze the huge space of textual variants for a paraphrased version of the to-be-obfuscated text that has a sufficient Jensen-Shannon distance at minimal costs in terms of text quality. We analyze, quantify, and illustrate the rationale of this approach, define paraphrasing operators, derive obfuscation thresholds, and develop an effective obfuscation framework. Our authorship obfuscation approach defeats state-of-the-art verification approaches, including unmasking and compression models, while keeping text changes at a minimum.},
added-at = {2024-10-02T10:38:17.000+0200},
address = {Florence, Italy},
author = {Bevendorff, Janek and Potthast, Martin and Hagen, Matthias and Stein, Benno},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/2e28844104ecc92075a5d927c70d5731f/scadsfct},
booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
doi = {10.18653/v1/P19-1104},
editor = {Korhonen, Anna and Traum, David and M{\`a}rquez, Llu{\'\i}s},
interhash = {ada6176a42c2f463b4253ec117a35db7},
intrahash = {e28844104ecc92075a5d927c70d5731f},
keywords = {imported},
month = jul,
pages = {1098--1108},
publisher = {Association for Computational Linguistics},
timestamp = {2024-10-02T10:38:17.000+0200},
title = {Heuristic Authorship Obfuscation},
url = {https://aclanthology.org/P19-1104},
year = 2019
}