articles.bib

@comment{{This file has been generated by bib2bib 1.94}}
@comment{{Command line: /usr/bin/bib2bib -c '$type = "ARTICLE"' -ob articles.bib hahsler.bib}}
@article{hahsler:Kotamarti2010b,
  author = {Kotamarti, Rao M. and Hahsler, Michael and Raiford, Douglas and McGee, Monnie and Dunham, Margaret H.},
  title = {{Analyzing Taxonomic Classification Using Extensible Markov Models}},
  journal = {Bioinformatics},
  volume = {},
  number = {},
  doi = {10.1093/bioinformatics/btq349},
  year = {2010},
  abstract = {
Motivation: As next generation sequencing is rapidly adding new genomes, their
correct placement in the taxonomy needs verification. However,
the current methods for confirming classification of a taxon or
suggesting revision for a potential misplacement relies on
computationally intense multi-sequence alignment followed by an
iterative adjustment of the distance matrix. Due to
intra-heterogeneity issues with the 16S rRNA marker, no
classifier is available for sub-genus level that could readily
suggest a classification for a novel 16S rRNA sequence.
Metagenomics further complicates the issue by generating
fragmented 16S rRNA sequences. This paper proposes a novel
alignment-free method for representing the microbial profiles
using Extensible Markov Models (EMM) with an extended
Karlin-Altschul statistical framework similar to the classic
alignment paradigm. We propose a Log Odds (LOD) score
classifier based on Gumbel difference distribution that
confirms correct classifications with statistical significance
qualifications and suggests revisions where necessary.
Results: We tested our method by generating a sub-genus level
classifier with which we re-evaluated classifications of 676
microbial organisms using the NCBI FTP database for the 16S
rRNA. The results confirm current classification for all genera
while ascertaining significance at 95\%. Furthermore, this novel
classifier isolates heterogeneity issues to a mere 12
strains while confirming classifications with
significance qualification for the remaining 98\%. The
models require less memory than that needed by
multi-sequence alignments and have better time
complexity than the current methods. The classifier
operates at sub-genus level and thus outperforms the
naive Bayes classifier of the RNA Database Project
where much of the taxonomic analysis is available
online. Finally, using information redundancy in model
building, we show that the method applies to
metagenomic fragment classification of 19 E.coli
strains.  
},
  url = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btq349v1},
  note = {Advance Access published July 12, 2010.}
}
@article{hahsler:Hahsler2010,
  author = {Michael Hahsler and Margaret H. Dunham},
  title = {\pkg{rEMM}: Extensible Markov Model for Data Stream
	Clustering in \proglang{R}},
  journal = {Journal of Statistical Software},
  year = {2010},
  volume = {35},
  number = {5},
  pages = {1--31},
  url = {http://www.jstatsoft.org/v35/i05/},
  abstract = {
    Clustering streams
	of continuously arriving data has become an important application of
	data mining in recent years and efficient algorithms have been proposed
	by several researchers. However, clustering alone neglects the fact
	that data in a data stream is not only characterized by the proximity
	of data points which is used by clustering, but also by a temporal
	component. The Extensible Markov Model (EMM) adds the temporal
	component to data stream clustering by superimposing a dynamically
	adapting Markov Chain. In this paper we introduce the implementation of
	the R extension package rEMM which implements EMM and we discuss some
	examples and applications.
    }
}
@article{hahsler:Hahsler2007g,
  author = {Michael Hahsler and Kurt Hornik},
  title = {{TSP} -- {I}nfrastructure for the Traveling Salesperson
                           Problem},
  journal = {Journal of Statistical Software},
  year = {2007},
  volume = {23},
  pages = {1-21},
  number = {2},
  month = {December},
  abstract = {
The traveling salesperson (or, salesman) problem (TSP) is a well known and
    important combinatorial optimization problem.  The goal is to find the
    shortest tour that visits each city in a given list exactly once and then
    returns to the starting city.  Despite this simple problem statement,
solving the TSP is difficult since it belongs to the class of NP-complete
    problems.  The importance of the TSP arises besides from its theoretical
    appeal from the variety of its applications.  Typical applications in
    operations research include vehicle routing, computer wiring, cutting
    wallpaper and job sequencing.  The main application in statistics is
    combinatorial data analysis, e.g., reordering rows and columns of data
    matrices or identifying clusters.  In this paper we introduce the
    R~package TSP which provides a basic infrastructure for
    handling and solving the traveling salesperson problem.  The package
    features S3 classes for specifying a TSP and its (possibly optimal)
    solution as well as several heuristics to find good solutions. In addition,
it provides an interface to Concorde, one of the best exact TSP solvers
    currently available.},
  issn = {1548-7660},
  url = {http://www.jstatsoft.org/v23/i02},
  pdf = {http://michael.hahsler.net/research/TSP_jss2007/v23i02/v23i02.pdf}
}
@article{hahsler:Hahsler2008,
  author = {Michael Hahsler and Kurt Hornik and Christian Buchta},
  title = {Getting Things in Order: An Introduction to the {R} 
      Package seriation},
  journal = {Journal of Statistical Software},
  year = {2008},
  volume = {25},
  pages = {1--34},
  number = {3},
  month = {March},
  abstract = {Seriation, i.e., finding a linear order for a set of objects
      given data and a loss or merit function, is a basic problem in data
          analysis.  Caused by the problem's combinatorial nature, it is hard
          to solve for all but very small sets.  Nevertheless, both exact
          solution methods and heuristics are available.  In this paper we
          present the package~seriation which provides the infrastructure for
          seriation with R.  The infrastructure comprises data structures to
          represent linear orders as permutation vectors, a wide array of
          seriation methods using a consistent interface, a method to calculate
          the value of various loss and merit functions, and several
          visualization techniques which build on seriation. To illustrate how
          easily the package can be applied for a variety of applications, a
          comprehensive collection of examples is presented.},
  issn = {1548-7660},
  url = {http://www.jstatsoft.org/v25/i03},
  pdf = {http://michael.hahsler.net/research/seriation_JSS2008/seriation.pdf}
}
@article{hahsler:Hahsler2007c,
  author = {Michael Hahsler and Kurt Hornik},
  title = {New Probabilistic Interest Measures for Association Rules},
  journal = {Intelligent Data Analysis},
  year = {2007},
  volume = {11},
  number = {5},
  pages = {437--455},
  abstract = {Mining association rules is an important technique for discovering
	meaningful patterns in transaction databases. Many different measures
	of interestingness have been proposed for association rules. However,
	these measures fail to take the probabilistic properties of the mined
	data into account. In this paper, we start with presenting a simple
	probabilistic framework for transaction data which can be used to
	simulate transaction data when no associations are present. We use
	such data and a real-world database from a grocery outlet to explore
	the behavior of confidence and lift, two popular interest measures
	used for rule mining. The results show that confidence is systematically
	influenced by the frequency of the items in the left hand side of
	rules and that lift performs poorly to filter random noise in transaction
	data. Based on the probabilistic framework we develop two new interest
	measures, hyper-lift and hyper-confidence, which can be used to filter
	or order mined association rules. The new measures show significantly
	better performance than lift for applications where spurious rules
	are problematic. },
  issn = {1088-467X},
  url = {http://iospress.metapress.com/openurl.asp?genre=article&issn=1088-467X&volume=11&issue=5&spage=437},
  pdf = {http://michael.hahsler.net/research/hyperConfidence_IDA2007/hyperConfidence.pdf}
}
@article{hahsler:Hahsler2007d,
  author = {Michael Hahsler and Christian Buchta and Kurt Hornik},
  title = {Selective Association Rule Generation},
  journal = {Computational Statistics},
  year = {2008},
  volume = {23},
  pages = {303--315},
  number = {2},
  month = {April},
  doi = {10.1007/s00180-007-0062-z},
  url = {http://dx.doi.org/10.1007/s00180-007-0062-z},
  abstract = {Mining association rules is a popular and well researched
    method for discovering interesting relations between variables in
    large databases. A practical problem is that at medium to low support
    values often a large number of frequent itemsets and an even larger
    number of association rules are found in a database.  A widely used
    approach is to gradually increase minimum support and minimum
    confidence or to filter the found rules using increasingly strict
    constraints on additional measures of interestingness until the set of
    rules found is reduced to a manageable size.  In this paper we describe
    a different approach which is based on the idea to first define a set
    of ``interesting'' itemsets (e.g., by a mixture of mining and expert
    knowledge) and then, in a second step to selectively generate rules
    for only these itemsets.  The main advantage of this approach over
    increasing thresholds or filtering rules is that the number of rules
    found is significantly reduced while at the same time it is not
    necessary to increase the support and confidence thresholds which
    might lead to missing important information in the database.
  },
  issn = {0943-4062},
  pdf = {http://michael.hahsler.net/research/ruleGeneration_cost2007/ruleInduction_CompStat.pdf}
}
@article{hahsler:Reutterer2007,
  author = {Thomas Reutterer and Michael Hahsler and Kurt Hornik},
  title = {{Data Mining und Marketing am Beispiel der explorativen Warenkorbanalyse}},
  journal = {{Marketing ZFP}},
  year = {2007},
  volume = {29},
  number = {3},
  pages = {165--181},
  abstract = {Techniken des Data Mining stellen f\"ur die Marketingforschung
      und {}-praxis eine zunehmend bedeutsamere Bereicherung des
          herk\"ommlichen Methodenarsenals dar. Mit dem Einsatz solcher
          prim\"ar datengetriebener Analysewerkzeuge wird das Ziel verfolgt,
      marketingrelevante Informationen ''intelligent'' aus
          gro{\ss}en Datenbanken (sog. Data Warehouses) zu extrahieren und
          f\"ur die weitere Entscheidungsvorbereitung in geeigneter Form
          aufzubereiten. Im vorliegenden Beitrag werden Ber\"uhrungspunkte
          zwischen Data Mining und Marketing diskutiert und der konkrete
          Einsatz ausgew\"ahlter Data{}-Mining{}-Methoden am Beispiel der
          explorativen Warenkorb{}- bzw.  Sortimentsverbundanalyse f\"ur einen
          Transaktionsdatensatz aus dem Lebensmitteleinzelhandel demonstriert.
          Zur Anwendung gelangen dabei Techniken aus dem Bereich der
          klassischen Affinit\"atsanalyse, ein \textit{K}{}-Medoid{}-Verfahren
          der Clusteranalyse sowie Werkzeuge zur Generierung und
          anschlie{\ss}enden Beurteilung von Assoziationsregeln zwischen im
          Sortiment enthaltenen Warengruppen. Die Vorgehensweise wird dabei
          anhand des mit der Statistik{}-Software R frei verf\"ugbaren
          Erweiterungspakets \textbf{arules} illustriert.
  },
  url = {http://vahlen.becksche.de/zeitschriften/}
}
@article{hahsler:Hahsler2006e,
  author = {Christoph Breidert and Michael Hahsler and Thomas Reutterer},
  title = {A Review of Methods for Measuring Willingness-to-Pay},
  journal = {Innovative Marketing},
  volume = {2},
  number = {4},
  pages = {8--32},
  year = {2006},
  abstract = {Knowledge about a product's willingness-to-pay on behalf of its (potential)
	customers plays a crucial role in many areas of marketing management
	like pricing decisions or new product development. Numerous approaches
	to measure willingness-to-pay with differential conceptual foundations
	and methodological implications have been presented in the relevant
	literature so far. This article provides the reader with a systematic
	overview of the relevant literature on these competing approaches
	and associated schools of thought, recognizes their respective merits
	and discusses obstacles and issues regarding their adoption to measuring
	willingness-to-pay. Because of its practical relevance, special focus
	will be put on indirect surveying techniques and, in particular,
	conjoint-based applications will be discussed in more detail. The
	strengths and limitations of the individual approaches are discussed
	and evaluated from a managerial point of view.},
  issn = {1814-2427},
  url = {http://www.businessperspectives.org/en/journalim},
  pdf = {http://michael.hahsler.net/research/wtp_innovative_marketing2006/wtp_breidert_hahsler_reutterer_preprint.pdf}
}
@article{hahsler:Hahsler2006a,
  author = {Michael Hahsler},
  title = {A Model-Based Frequency Constraint for Mining Associations from Transaction
	Data},
  journal = {Data Mining and Knowledge Discovery},
  year = {2006},
  volume = {13},
  pages = {137--166},
  number = {2},
  month = {September},
  abstract = {Mining frequent itemsets is a popular method for finding associated
	items in databases. For this method, support, the co-occurrence frequency
	of the items which form an association, is used as the primary indicator
	of the associations's significance. A single user-specified support
	threshold is used to decided if associations should be further investigated.
	Support has some known problems with rare items, favors shorter itemsets
	and sometimes produces misleading associations. In this paper we
	develop a novel model-based frequency constraint as an alternative
	to a single, user-specified minimum support. The constraint utilizes
	knowledge of the process generating transaction data by applying
	a simple stochastic mixture model (the NB model) which allows for
	transaction data's typically highly skewed item frequency distribution.
	A user-specified precision threshold is used together with the model
	to find local frequency thresholds for groups of itemsets. Based
	on the constraint we develop the notion of NB-frequent itemsets and
	adapt a mining algorithm to find all NB-frequent itemsets in a database.
	In experiments with publicly available transaction databases we show
	that the new constraint provides improvements over a single minimum
	support threshold and that the precision threshold is more robust
	and easier to set and interpret by the user. },
  doi = {10.1007/s10618-005-0026-2},
  issn = {1384-5810},
  pdf = {http://michael.hahsler.net/research/nbd_dami2005/nbd_associationrules_dami2005.pdf},
  url = {http://dx.doi.org/10.1007/s10618-005-0026-2}
}
@article{hahsler:Hahsler2005f,
  author = {Michael Hahsler and Bettina Gr{\"u}n and Kurt Hornik},
  title = {arules -- {A} Computational Environment for Mining Association Rules
	and Frequent Item Sets},
  journal = {Journal of Statistical Software},
  year = {2005},
  volume = {14},
  pages = {1--25},
  number = {15},
  month = {October},
  abstract = {Mining frequent itemsets and association rules is a popular and well
	researched approach for discovering interesting relationships between
	variables in large databases. The R package arules presented in this
	paper provides a basic infrastructure for creating and manipulating
	input data sets and for analyzing the resulting itemsets and rules.
	The package also includes interfaces to two fast mining algorithms,
	the popular C implementations of Apriori and Eclat by Christian Borgelt.
	These algorithms can be used to mine frequent itemsets, maximal frequent
	itemsets, closed frequent itemsets and association rules.},
  issn = {1548-7660},
  pdf = {http://michael.hahsler.net/research/arules_jss2005/v14i15.pdf},
  url = {http://www.jstatsoft.org/v14/i15}
}
@article{hahsler:Hahsler2003b,
  author = {Michael Hahsler},
  title = {Integrating Digital Document Acquisition into a University Library:
	A Case Study of Social and Organizational Challenges},
  journal = {Journal of Digital Information Management},
  year = {2003},
  volume = {1},
  pages = {162--171},
  number = {4},
  month = dec,
  abstract = {In this article we report on the effort of the university library
	of the Vienna University of Economics and Business Administration
	to integrate a digital library component for research documents authored
	at the university into the existing library infrastructure. Setting
	up a digital library has become a relatively easy task using the
	current data base technology and the components and tools freely
	available. However, to integrate such a digital library into existing
	library systems and to adapt existing document acquisition work-flows
	in the organization are non-trivial tasks. We use a research frame
	work to identify the key players in this change process and to analyze
	their incentive structures. Then we describe the light-weight integration
	approach employed by our university and show how it provides incentives
	to the key players and at the same time requires only minimal adaptation
	of the organization in terms of changing existing work-flows. Our
	experience suggests that this light-weight integration offers a cost
	efficient and low risk intermediate step towards switching to exclusive
	digital document acquisition.},
  issn = {0972-7272},
  pdf = {http://michael.hahsler.net/research/ePub_jdim2003/IntegratingDDAcquisition_final.pdf},
  url = {http://www.dirf.org/jdim/v1i4.htm}
}
@article{hahsler:GeyerSchulz2002a,
  author = {Wolfgang Gaul and Andreas Geyer-Schulz and Michael Hahsler and Lars
	Schmidt-Thieme},
  title = {{eMarketing mittels Recommendersystemen}},
  journal = {{Marketing ZFP}},
  year = {2002},
  volume = {24},
  pages = {47--55},
  abstract = {Recommendersysteme liefern einen wichtigen Beitrag f{\"u}r die Ausgestaltung
	von eMarketing Aktivit{\"a}ten. Ausgehend von einer Diskussion von
	Input/Output Charakteristika zur Beschreibung solcher Systeme, die
	bereits eine geeignete Unterscheidung praxisrelevanter Erscheinungsformen
	erlauben, wird motiviert, warum eine solche Charakterisierung durch
	die Einbeziehung methodischer Aspekte aus der Marketing Forschung
	angereichert werden muss. Ein auf der Theorie des Wiederkaufverhaltens
	basierendes Recommendersystem sowie ein System, das Empfehlungen
	mittels Analyse des Navigationsverhaltens von Site Besuchern erzeugt,
	werden vorgestellt. Am Beispiel der Amazon Site werden die Marketing
	M{\"o}glichkeiten von Recommendersystemen verdeutlicht. Abschlie{\ss}end
	wird zur Abrundung auf weitere Literatur mit Recommendersystem Bezug
	eingegangen. In einem Ausblick werden Hinweise gegeben, in welche
	Richtungen Weiterentwicklungen geplant sind.},
  series = {Spezialausgabe ''E-Marketing''},
  url = {http://vahlen.becksche.de/zeitschriften/}
}
@article{hahsler:GeyerSchulz2001b,
  author = {Andreas Geyer-Schulz and Michael Hahsler and Maximillian Jahn},
  title = {Educational and Scientific Recommender Systems: Designing the Information
	Channels of the Virtual University},
  journal = {International Journal of Engineering Education},
  year = {2001},
  volume = {17},
  pages = {153--163},
  number = {2},
  abstract = {In this article we investigate the role of recommender systems and
	their potential in the educational and scientific environment of
	a Virtual University. The key idea is to use the information aggregation
	capabilities of a recommender system to improve the tutoring and
	consulting services of a Virtual University in an automated way and
	thus scale tutoring and consulting in a personalized way to a mass
	audience. We describe the recommender services of myVU, the collection
	of the personalized services of the Virtual University (VU) of the
	Vienna University of Economics and Business Administration which
	are based on observed user behavior and self assignment of experience
	which are currently field-tested. We show, how the usual mechanism
	design problems inherent to recommender systems are addressed in
	this prototype.},
  issn = {0949-149X},
  pdf = {http://michael.hahsler.net/research/recomm_ijee2001/paper.pdf},
  series = {Special Issue on Virtual Universities},
  url = {http://www.ijee.dit.ie/contents/c170201.html}
}
@article{hahsler:GeyerSchulz1999,
  author = {Andreas Geyer-Schulz and Michael Hahsler and Georg Schneider},
  title = {The Virtual University and Its Embedded Agents},
  journal = {{\"O}{G}{A}{I}{} Journal},
  year = {1999},
  volume = {18},
  pages = {14--19},
  number = {1},
  abstract = {In this article we present the current state of usage of (intelligent)
	Internet agents in the Virtual University (VU) of the Vienna University
	of Economics and BA. We discuss opportunities and challenges for
	the development of several classes of agents and their sensor systems.
	More specifically, agents of the following classes embedded in the
	virtual university system will be presented: (1) robots which support
	navigation services and (2) robots which support communication and
	collaboration.},
  issn = {0254-4326}
}
@article{hahsler:GeyerSchulz1998,
  author = {Peter Bruhn and Andreas Geyer-Schulz and Michael Hahsler and Markus
	Mottel},
  title = {Genetic Machine Learning and Intelligent Internet Agents},
  journal = {{\"O}{G}{A}{I}{} Journal},
  year = {1998},
  volume = {17},
  pages = {18--25},
  number = {1},
  abstract = { In this paper we report on the status quo of the current machine
	learning research projects at the Department of Applied Computer
	Science of the Institute of Information Processing and Information
	Economics of the Vienna University of Economics and Business Administration.
	The current research activities can be categorized as follows: (1)
	Development of a theoretic framework of genetic programming. (2)
	Application of genetic programming for managerial and economic decision-making
	and for breeding agents' strategies in organizational learning. (3)
	Development, adaptation, and integration of (intelligent) Internet
	agents for support of the virtual organizations. (4) Development
	of an infrastructure for intelligent Internet agents in the ''Living
	Lectures - Virtual University'' project. (5) Cost-benefit analysis
	of agents, analysis of tactical and strategic consequences of agents
	and the analysis of their economic applications. },
  issn = {0254-4326}
}

This file was generated by bibtex2html 1.94.