@inproceedings{28699478df034f5ab258b8cf40a93718,
title = "Robin Hood: A De-identification Method to Preserve Minority Representation for Disparities Research",
abstract = "Data stewards often turn to de-identification to make data available for research while complying with privacy law. A primary challenge to de-identification is balancing the privacy-utility tradeoff, but optimizing the tradeoff with respect to a complete dataset has been shown to create both privacy risk and data utility disparities between subgroups of individuals represented in the dataset. Notably, the minority populations incur the greatest utility loss and privacy risks. Recent studies have shown that utility inequalities can mask disparities and bias algorithms trained on such data. Yet achieving equal privacy and utility is inherently constrained by the fact that each subgroup has a different privacy-utility tradeoff, differences that are exacerbated by the deterministic transformations that standard de-identification models typically employ. To address this problem, we introduce Robin Hood, a de-identification method that leverages non-deterministic transformations to more equally distribute risk and utility in a de-identified dataset. It does so by transforming majority groups{\textquoteright} records in a way that gives minorities privacy. We show how Robin Hood can provide equal privacy protections to all records in a dataset at expectation while supporting more accurate and consistent disparity estimation than standard k-anonymity methods in simulated and real-world Census data.",
keywords = "Anonymization, De-identification, Fairness",
author = "Brown, {James Thomas} and Clayton, {Ellen W.} and Michael Matheny and Murat Kantarcioglu and Yevgeniy Vorobeychik and Malin, {Bradley A.}",
note = "Publisher Copyright: {\textcopyright} The Author(s), under exclusive license to Springer Nature Switzerland AG 2024.; International Conference on Privacy in Statistical Databases, PSD 2024 ; Conference date: 25-09-2024 Through 27-09-2024",
year = "2024",
doi = "10.1007/978-3-031-69651-0_5",
language = "English",
isbn = "9783031696503",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Science and Business Media Deutschland GmbH",
pages = "67--83",
editor = "Josep Domingo-Ferrer and Melek {\"O}nen",
booktitle = "Privacy in Statistical Databases - International Conference, PSD 2024, Proceedings",
}