bioinformatics.bib

@comment{{This file has been generated by bib2bib 1.96}}
@comment{{Command line: /usr/bin/bib2bib -c 'category : "bioinformatics"' -ob bioinformatics.bib hahsler.bib}}
@inproceedings{hahsler:ElDayeh2012,
  author = {Maya El Dayeh and Michael Hahsler},
  title = {Biological Pathway Completion Using Network Motifs and Random Walks on Graphs},
  booktitle = {IEEE Symposium on Computational
    Intelligence in Bioinformatics and Computational Biology (CIBCB 2012)},
  year = {2012},
  month = {May},
  pages = {229--236},
  location = {San Diego, CA},
  date = {May 9--12, 2012},
  publisher = {IEEE},
  editor = {},
  abstract = {
	Enhancing our understanding of cellular regulatory processes will
	ultimately lead to the development of better therapeutic
	strategies. Completing incomplete biological pathways through
	utilizing probabilistic protein-protein interaction (PPI) networks
	is one approach towards establishing knowledge of these regulatory
	processes.  Previous complex/pathway membership methods focused on
	uncovering candidate protein members from a probabilistic
	protein-protein interaction (PPI) networks. In our previous work,
	we defined the pathway completion problem and developed a method that
	uses network motifs to complete incomplete biological pathways.
	Network motifs allow us to take into consideration the intrinsic
	local structures of the pathways to identify the possible points of
	insertion of candidate proteins. However, our previous approach
	requires a complete and correct PPI network. In this paper, we
	extend our previous work and use random walks on a graph to address
	the pathway completion problem with incomplete PPI networks. We
	evaluate our proposed method using three probabilistic PPI networks
	and two KEGG (Kyoto Encyclopedia of Genes and Genomes) pathways.
	Moreover, we compare the accuracy of our network motif approach for
	pathway completion to the exiting approach for pathway membership.
	Our experiments show that our new approach achieves similar or
	better accuracy. In addition, our method identifies the possible
	locations and connections of the candidate proteins in the
	incomplete pathway, thus, allowing for targeted experimental
	verification.
    },
  pdf = {http://michael.hahsler.net/research/BiologicalPathway/CIBCB/Pathway_Random_Walk.pdf},
  category = {bioinformatics}
}
@inproceedings{hahsler:ElDayeh2011,
  author = {Maya El Dayeh and Michael Hahsler},
  title = {Analyzing incomplete biological pathways using network motifs},
  booktitle = {27th Symposium On Applied Computing (SAC 2012)},
  year = {2012},
  month = {},
  pages = {1355--1360},
  location = {Riva del Garda, Italy},
  date = {March 26--30, 2012},
  publisher = {ACM},
  volume = {2},
  number = {2},
  editor = {},
  abstract = {
    It is widely accepted that existing knowledge about the structure
	of many biological pathways is incomplete and uncovering
	missing proteins in a biological pathway can help guide targeted
	therapy and drug design and discovery. Current approaches
	address the complex/pathway membership problem by identifying
	potentially missing proteins using probabilistic protein-protein
	interaction (PPI) networks. In this paper we extend the idea of the
	pathway membership problem and define the pathway completion
	problem. In addition to finding possible protein candidates, this
	problem requires predicting the locations and connections of these
	proteins within a given incomplete pathway. We propose the use
	of network motifs to tackle the pathway completion problem. We
	present an algorithm which breaks down an incomplete pathway
	into a set of constituent motifs and then uses the proteins retrieved
	from a probabilistic PPI network to improve the motifs. This new
	approach also has the potential to improve solutions to the
	membership problem by better exploiting the local structures
	represented by network motifs. These new ideas are illustrated
	with a set of preliminary experiments.
    },
  pdf = {http://michael.hahsler.net/research/BiologicalPathway/Pathway_Motifs_SAC2012.pdf},
  category = {bioinformatics}
}
@article{hahsler:Kotamarti2010b,
  author = {Kotamarti, Rao M. and Hahsler, Michael and Raiford, Douglas and McGee, Monnie and Dunham, Margaret H.},
  title = {Analyzing Taxonomic Classification Using Extensible {M}arkov Models},
  journal = {Bioinformatics},
  volume = {26},
  number = {18},
  doi = {10.1093/bioinformatics/btq349},
  year = {2010},
  pages = {2235--2241},
  abstract = {
Motivation: As next generation sequencing is rapidly adding new genomes, their
correct placement in the taxonomy needs verification. However,
the current methods for confirming classification of a taxon or
suggesting revision for a potential misplacement relies on
computationally intense multi-sequence alignment followed by an
iterative adjustment of the distance matrix. Due to
intra-heterogeneity issues with the 16S rRNA marker, no
classifier is available for sub-genus level that could readily
suggest a classification for a novel 16S rRNA sequence.
Metagenomics further complicates the issue by generating
fragmented 16S rRNA sequences. This paper proposes a novel
alignment-free method for representing the microbial profiles
using Extensible Markov Models (EMM) with an extended
Karlin-Altschul statistical framework similar to the classic
alignment paradigm. We propose a Log Odds (LOD) score
classifier based on Gumbel difference distribution that
confirms correct classifications with statistical significance
qualifications and suggests revisions where necessary.
Results: We tested our method by generating a sub-genus level
classifier with which we re-evaluated classifications of 676
microbial organisms using the NCBI FTP database for the 16S
rRNA. The results confirm current classification for all genera
while ascertaining significance at 95\%. Furthermore, this novel
classifier isolates heterogeneity issues to a mere 12
strains while confirming classifications with
significance qualification for the remaining 98\%. The
models require less memory than that needed by
multi-sequence alignments and have better time
complexity than the current methods. The classifier
operates at sub-genus level and thus outperforms the
naive Bayes classifier of the RNA Database Project
where much of the taxonomic analysis is available
online. Finally, using information redundancy in model
building, we show that the method applies to
metagenomic fragment classification of 19 E.coli
strains.  
},
  url = {http://bioinformatics.oxfordjournals.org/content/26/18/2235},
  category = {bioinformatics}
}
@inproceedings{hahsler:Kotamarti2010,
  author = {Rao M. Kotamarti and Michael Hahsler and Douglas W. Raiford and Margaret H. Dunham},
  title = {Sequence transformation to a complex signature form for consistent Phylogenetic tree using Extensible {M}arkov Model},
  booktitle = {Proceedings of the 2010 IEEE Symposium on Computational Intelligence in Bioinformatics and Computational Biology (IEEE CIBCB 2010)},
  year = {2010},
  editor = {},
  pages = {},
  publisher = {IEEE},
  abstract = {
      Phylogenetic tree analysis using molecular sequences
	  continues to expand beyond the 16S rRNA marker. By addressing
	  the multi-copy issue known as the intra-heterogeneity,
      this paper restores the focus in using the 16S rRNA marker.
	  Through use of a novel learning and model building algorithm,
      the multiple gene copies are integrated into a compact complex
	  signature using the Extensible Markov Model (EMM). The
	  method clusters related sequence segments while preserving
	  their inherent order to create an EMM signature for a microbial
	  organism. A library of EMM signatures is generated
	  from which samples are drawn for phylogenetic analysis. By
	  matching the components of two signatures, referred to as
	  quasi-alignment, the differences are highlighted and scored.
	  Scoring quasi-alignments is done using adapted Karlin-Altschul
	  statistics to compute a novel distance metric. The metric satisfies
	  conditions of identity, symmetry, triangular inequality and the
	  four point rule required for a valid evolution distance metric.
	  The resulting distance matrix is input to PHYologeny Inference
	  Package (PHYLIP) to generate phylogenies using neighbor
	  joining algorithms. Through control of clustering in signature
	  creation, the diversity of similar organisms and their placement
	  in the phylogeny is explained. The experiments include analysis
	  of genus Burkholderia, a random microbial sample spanning
	  several phyla and a diverse sample that includes RNA of
	  Eukaryotic origin. The NCBI sequence data for 16S rRNA is
	  used for validation.
  },
  pdf = {http://michael.hahsler.net/research/EMMSA/EMMSA_CIBCB2010.pdf},
  category = {bioinformatics}
}
@techreport{hahsler:Kotamarti2009,
  author = {Rao M. Kotamarti and Douglas W. Raiford and Michael Hahsler and Yuhang Wang and Monnie McGee and Margaret H. Dunham},
  title = {Targeted Genomic signature profiling with Quasi-alignment statistics},
  institution = {COBRA Preprint Series},
  year = {2009},
  type = {Article},
  number = {63},
  month = {November},
  abstract = {
  Genome databases continue to expand with no change in the basic format of 
      sequence data. The prevalent use of the classic alignment based search
      tools like BLAST have significantly pushed the limits of genome isolate
      research. The relatively new frontier of Metagenomic research deals with
      thousands of diverse genomes with newer demands beyond the current
      homologue search and analysis. Compressing sequence data into a complex
      form could facilitate a broader range of sequence analyses. To this end,
  this research explores reorganizing sequence data as complex Markov
      signatures also known as Extensible Markov Models. Markov models have
      found successful application in biological sequence analysis
      applications through small, but important extensions to the original
      theory of Markov Chains. Extensible Markov Model (EMM) offers a novel
      Quasi-alignment complement to the classic alignment based homologous
      sequence search methods like BLAST. EMM based bioinformatic analysis
      (EMMBA) incorporates automatic learning which allows the Markov chain
      creation dynamically. Oligonucletide or genomic word frequencies form
      the core sequence data in alignment free methods. EMMBA extends the
      Karlin-Altschul statistics to bring forth an analogous E-Score
      statistical significance to the quasi-alignment domain. By consolidating
      a community of sequences into a single searchable profile, EMM
      methodology further reduces the search space for classification.  Through
      dynamic generation of the score matrix for each community profile, EMMBA
      fine tunes the score assignments. Each evaluation iteratively adjusts the
      profile score matrix to account for point probabilities of the query to
      ensure Karlin-Altschul assumptions are satisfied to derive meaningful
      statistical signifi- cance. The presence of multiple quasi-alignments
      resembles multiple local alignments of BLAST. Quasi-alignments are
      scored based on a difference distribution of Gumbel scores. Species
      signature profiles allow for statistical validation of novel species
      identification. Working in EMM transformation space speeds up
classification and generates distance matrix for differentiation. The techniques
and metrics presented are validated using the microbial 16s rRNA sequence data
from NCBI.

  },
  url = {http://biostats.bepress.com/cobra/ps/art63/},
  category = {bioinformatics}
}

This file was generated by bibtex2html 1.96.