author = {R. Nasr, T. Kristensen, P. Baldi},
   title = {Tree and Hashing Data Structures to Speedup Chemical Searches: Analysis and Experiments.},
   journal = {Molecular Informatics},
   volume = {30},
   number = {9},
   pages = {791-800},
   note = {Special Issue on Machine Learning Methods in Chemoinformatics/NIPS.},
   abstract = {In many large chemoinformatics database systems, molecules are represented by long binary fingerprint vectors whose components record the presence or absence of particular functional groups or combinatorial features. For a given query molecule, one is interested in retrieving all the molecules in the database with a similarity to the query above a certain threshold. Here we describe a method for speeding up chemical searches in these large databases of small molecules by combining previously developed tree and hashing data structures to prune the search space without any false negatives. More importantly, we provide a mathematical analysis that allows one to predict the level of pruning, and validate the quality of the predictions of the method through simulation experiments.},
   keywords = {},
   year = {2011}