Skip to content

Commit d81b58e

Browse files
committed
Add code stylometry reference
1 parent 7b13b14 commit d81b58e

1 file changed

Lines changed: 18 additions & 0 deletions

File tree

docs/references/bib.bib

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1636,3 +1636,21 @@ @article{dekker:1971a
16361636
volume = {18},
16371637
year = {1971},
16381638
}
1639+
1640+
@inproceedings{caliskan-islam:2015a,
1641+
abstract = {Source code authorship attribution is a significant privacy threat to anonymous code contributors. However, it may also enable attribution of successful attacks from code left behind on an infected system, or aid in resolving copyright, copyleft, and plagiarism issues in the programming fields. In this work, we investigate machine learning methods to de-anonymize source code authors of C/C++ using coding style. Our Code Stylometry Feature Set is a novel representation of coding style found in source code that reflects coding style from properties derived from abstract syntax trees. Our random forest and abstract syntax tree-based approach attributes more authors (1,600 and 250) with significantly higher accuracy (94% and 98%) on a larger data set (Google Code Jam) than has been previously achieved. Furthermore, these novel features are robust, difficult to obfuscate, and can be used in other programming languages, such as Python. We also find that (i) the code resulting from difficult programming tasks is easier to attribute than easier tasks and (ii) skilled programmers (who can complete the more difficult tasks) are easier to attribute than less skilled programmers.},
1642+
acmid = {2831160},
1643+
address = {Berkeley, CA, USA},
1644+
author = {Aylin Caliskan-Islam and Richard Harang and Andrew Liu and Arvind Narayanan and Clare Voss and Fabian Yamaguchi and Rachel Greenstadt},
1645+
booktitle = {Proceedings of the 24th USENIX Conference on Security Symposium},
1646+
isbn = {978-1-931971-232},
1647+
keywords = {code style, stylometry, static analysis, javascript, automation},
1648+
location = {Washington, D.C.},
1649+
numpages = {16},
1650+
pages = {255--270},
1651+
publisher = {USENIX Association},
1652+
series = {SEC'15},
1653+
title = {{De-anonymizing Programmers via Code Stylometry}},
1654+
url = {http://dl.acm.org/citation.cfm?id=2831143.2831160},
1655+
year = {2015},
1656+
}

0 commit comments

Comments
 (0)