@comment{{This file has been generated by bib2bib 1.96}}
@comment{{Command line: /usr/bin/bib2bib -c 'category : "bioinformatics"' -ob bioinformatics.bib hahsler.bib}}
@inproceedings{hahsler:ElDayeh2012,
author = {Maya El Dayeh and Michael Hahsler},
title = {Biological Pathway Completion Using Network Motifs and Random Walks on Graphs},
booktitle = {IEEE Symposium on Computational
Intelligence in Bioinformatics and Computational Biology (CIBCB 2012)},
year = {2012},
month = {May},
pages = {229--236},
location = {San Diego, CA},
date = {May 9--12, 2012},
publisher = {IEEE},
editor = {},
abstract = {
Enhancing our understanding of cellular regulatory processes will
ultimately lead to the development of better therapeutic
strategies. Completing incomplete biological pathways through
utilizing probabilistic protein-protein interaction (PPI) networks
is one approach towards establishing knowledge of these regulatory
processes. Previous complex/pathway membership methods focused on
uncovering candidate protein members from a probabilistic
protein-protein interaction (PPI) networks. In our previous work,
we defined the pathway completion problem and developed a method that
uses network motifs to complete incomplete biological pathways.
Network motifs allow us to take into consideration the intrinsic
local structures of the pathways to identify the possible points of
insertion of candidate proteins. However, our previous approach
requires a complete and correct PPI network. In this paper, we
extend our previous work and use random walks on a graph to address
the pathway completion problem with incomplete PPI networks. We
evaluate our proposed method using three probabilistic PPI networks
and two KEGG (Kyoto Encyclopedia of Genes and Genomes) pathways.
Moreover, we compare the accuracy of our network motif approach for
pathway completion to the exiting approach for pathway membership.
Our experiments show that our new approach achieves similar or
better accuracy. In addition, our method identifies the possible
locations and connections of the candidate proteins in the
incomplete pathway, thus, allowing for targeted experimental
verification.
},
pdf = {http://michael.hahsler.net/research/BiologicalPathway/CIBCB/Pathway_Random_Walk.pdf},
category = {bioinformatics}
}
@inproceedings{hahsler:ElDayeh2011,
author = {Maya El Dayeh and Michael Hahsler},
title = {Analyzing incomplete biological pathways using network motifs},
booktitle = {27th Symposium On Applied Computing (SAC 2012)},
year = {2012},
month = {},
pages = {1355--1360},
location = {Riva del Garda, Italy},
date = {March 26--30, 2012},
publisher = {ACM},
volume = {2},
number = {2},
editor = {},
abstract = {
It is widely accepted that existing knowledge about the structure
of many biological pathways is incomplete and uncovering
missing proteins in a biological pathway can help guide targeted
therapy and drug design and discovery. Current approaches
address the complex/pathway membership problem by identifying
potentially missing proteins using probabilistic protein-protein
interaction (PPI) networks. In this paper we extend the idea of the
pathway membership problem and define the pathway completion
problem. In addition to finding possible protein candidates, this
problem requires predicting the locations and connections of these
proteins within a given incomplete pathway. We propose the use
of network motifs to tackle the pathway completion problem. We
present an algorithm which breaks down an incomplete pathway
into a set of constituent motifs and then uses the proteins retrieved
from a probabilistic PPI network to improve the motifs. This new
approach also has the potential to improve solutions to the
membership problem by better exploiting the local structures
represented by network motifs. These new ideas are illustrated
with a set of preliminary experiments.
},
pdf = {http://michael.hahsler.net/research/BiologicalPathway/Pathway_Motifs_SAC2012.pdf},
category = {bioinformatics}
}
@article{hahsler:Kotamarti2010b,
author = {Kotamarti, Rao M. and Hahsler, Michael and Raiford, Douglas and McGee, Monnie and Dunham, Margaret H.},
title = {Analyzing Taxonomic Classification Using Extensible {M}arkov Models},
journal = {Bioinformatics},
volume = {26},
number = {18},
doi = {10.1093/bioinformatics/btq349},
year = {2010},
pages = {2235--2241},
abstract = {
Motivation: As next generation sequencing is rapidly adding new genomes, their
correct placement in the taxonomy needs verification. However,
the current methods for confirming classification of a taxon or
suggesting revision for a potential misplacement relies on
computationally intense multi-sequence alignment followed by an
iterative adjustment of the distance matrix. Due to
intra-heterogeneity issues with the 16S rRNA marker, no
classifier is available for sub-genus level that could readily
suggest a classification for a novel 16S rRNA sequence.
Metagenomics further complicates the issue by generating
fragmented 16S rRNA sequences. This paper proposes a novel
alignment-free method for representing the microbial profiles
using Extensible Markov Models (EMM) with an extended
Karlin-Altschul statistical framework similar to the classic
alignment paradigm. We propose a Log Odds (LOD) score
classifier based on Gumbel difference distribution that
confirms correct classifications with statistical significance
qualifications and suggests revisions where necessary.
Results: We tested our method by generating a sub-genus level
classifier with which we re-evaluated classifications of 676
microbial organisms using the NCBI FTP database for the 16S
rRNA. The results confirm current classification for all genera
while ascertaining significance at 95\%. Furthermore, this novel
classifier isolates heterogeneity issues to a mere 12
strains while confirming classifications with
significance qualification for the remaining 98\%. The
models require less memory than that needed by
multi-sequence alignments and have better time
complexity than the current methods. The classifier
operates at sub-genus level and thus outperforms the
naive Bayes classifier of the RNA Database Project
where much of the taxonomic analysis is available
online. Finally, using information redundancy in model
building, we show that the method applies to
metagenomic fragment classification of 19 E.coli
strains.
},
url = {http://bioinformatics.oxfordjournals.org/content/26/18/2235},
category = {bioinformatics}
}
@inproceedings{hahsler:Kotamarti2010,
author = {Rao M. Kotamarti and Michael Hahsler and Douglas W. Raiford and Margaret H. Dunham},
title = {Sequence transformation to a complex signature form for consistent Phylogenetic tree using Extensible {M}arkov Model},
booktitle = {Proceedings of the 2010 IEEE Symposium on Computational Intelligence in Bioinformatics and Computational Biology (IEEE CIBCB 2010)},
year = {2010},
editor = {},
pages = {},
publisher = {IEEE},
abstract = {
Phylogenetic tree analysis using molecular sequences
continues to expand beyond the 16S rRNA marker. By addressing
the multi-copy issue known as the intra-heterogeneity,
this paper restores the focus in using the 16S rRNA marker.
Through use of a novel learning and model building algorithm,
the multiple gene copies are integrated into a compact complex
signature using the Extensible Markov Model (EMM). The
method clusters related sequence segments while preserving
their inherent order to create an EMM signature for a microbial
organism. A library of EMM signatures is generated
from which samples are drawn for phylogenetic analysis. By
matching the components of two signatures, referred to as
quasi-alignment, the differences are highlighted and scored.
Scoring quasi-alignments is done using adapted Karlin-Altschul
statistics to compute a novel distance metric. The metric satisfies
conditions of identity, symmetry, triangular inequality and the
four point rule required for a valid evolution distance metric.
The resulting distance matrix is input to PHYologeny Inference
Package (PHYLIP) to generate phylogenies using neighbor
joining algorithms. Through control of clustering in signature
creation, the diversity of similar organisms and their placement
in the phylogeny is explained. The experiments include analysis
of genus Burkholderia, a random microbial sample spanning
several phyla and a diverse sample that includes RNA of
Eukaryotic origin. The NCBI sequence data for 16S rRNA is
used for validation.
},
pdf = {http://michael.hahsler.net/research/EMMSA/EMMSA_CIBCB2010.pdf},
category = {bioinformatics}
}
@techreport{hahsler:Kotamarti2009,
author = {Rao M. Kotamarti and Douglas W. Raiford and Michael Hahsler and Yuhang Wang and Monnie McGee and Margaret H. Dunham},
title = {Targeted Genomic signature profiling with Quasi-alignment statistics},
institution = {COBRA Preprint Series},
year = {2009},
type = {Article},
number = {63},
month = {November},
abstract = {
Genome databases continue to expand with no change in the basic format of
sequence data. The prevalent use of the classic alignment based search
tools like BLAST have significantly pushed the limits of genome isolate
research. The relatively new frontier of Metagenomic research deals with
thousands of diverse genomes with newer demands beyond the current
homologue search and analysis. Compressing sequence data into a complex
form could facilitate a broader range of sequence analyses. To this end,
this research explores reorganizing sequence data as complex Markov
signatures also known as Extensible Markov Models. Markov models have
found successful application in biological sequence analysis
applications through small, but important extensions to the original
theory of Markov Chains. Extensible Markov Model (EMM) offers a novel
Quasi-alignment complement to the classic alignment based homologous
sequence search methods like BLAST. EMM based bioinformatic analysis
(EMMBA) incorporates automatic learning which allows the Markov chain
creation dynamically. Oligonucletide or genomic word frequencies form
the core sequence data in alignment free methods. EMMBA extends the
Karlin-Altschul statistics to bring forth an analogous E-Score
statistical significance to the quasi-alignment domain. By consolidating
a community of sequences into a single searchable profile, EMM
methodology further reduces the search space for classification. Through
dynamic generation of the score matrix for each community profile, EMMBA
fine tunes the score assignments. Each evaluation iteratively adjusts the
profile score matrix to account for point probabilities of the query to
ensure Karlin-Altschul assumptions are satisfied to derive meaningful
statistical signifi- cance. The presence of multiple quasi-alignments
resembles multiple local alignments of BLAST. Quasi-alignments are
scored based on a difference distribution of Gumbel scores. Species
signature profiles allow for statistical validation of novel species
identification. Working in EMM transformation space speeds up
classification and generates distance matrix for differentiation. The techniques
and metrics presented are validated using the microbial 16s rRNA sequence data
from NCBI.
},
url = {http://biostats.bepress.com/cobra/ps/art63/},
category = {bioinformatics}
}
This file was generated by bibtex2html 1.96.