@inproceedings{f554d64f3764475a852835fcf30e9a34,
title = "A comparative study on data perturbation with feature selection",
abstract = "As a major concern in designing various data mining applications, privacy preservation has become a critical component seeking a trade-off between mining utilities and protecting sensitive information. Data perturbation or distortion is a widely used approach for privacy protection. Either by adding noises or matrix decomposition methods, many algorithms were developed based on the simulation of attacker's behaviors. Most of them are complicated and computationally infeasible on dataset with huge attribute space. In addition, the real-world data tend to be inconsistent, redundant and consist of irrelevant part to target information. Executing algorithms on such data is costly and ineffective. Data preprocessing routines attempt to smooth out noise while identifying outliers, and correct inconsistencies in the data. One of the most important data preprocessing techniques is feature selection. In this paper, we intensively studied Singular Value Decomposition (SVD) based data distortion strategy and feature selection techniques, and conducted experiments to explore how feature selection approaches should be used and better serve for privacy preservation purpose. Sparsified Singular Value Decomposition (SSVD) and filter based feature selection are used for data distortion and reducing feature space. We propose a modified version of Exponential Threshold Strategy(ETS) as our threshold function for matrix sparsification. Some metrics are used to measure data distortion level. We also proposed a novel algorithm to compute rank and gave its lower running time bound. The mining utility of distorted data is tested with a well known Classifier, Support Vector Machine (SVM).",
keywords = "Feature selection, Perturbation, SSVD, SVD, SVM",
author = "Pengpeng Lin and Jun Zhang and {St. Omer}, Ingrid and Huanjing Wang and Jie Wang",
year = "2011",
language = "English",
isbn = "9789881821034",
series = "IMECS 2011 - International MultiConference of Engineers and Computer Scientists 2011",
pages = "454--459",
booktitle = "IMECS 2011 - International MultiConference of Engineers and Computer Scientists 2011",
note = "null ; Conference date: 16-03-2011 Through 18-03-2011",
}