@comment{{This file has been generated by bib2bib 1.94}}
@comment{{Command line: /usr/bin/bib2bib -c '$type = "ARTICLE"' -ob articles.bib hahsler.bib}}
@article{hahsler:Kotamarti2010b,
author = {Kotamarti, Rao M. and Hahsler, Michael and Raiford, Douglas and McGee, Monnie and Dunham, Margaret H.},
title = {{Analyzing Taxonomic Classification Using Extensible Markov Models}},
journal = {Bioinformatics},
volume = {},
number = {},
doi = {10.1093/bioinformatics/btq349},
year = {2010},
abstract = {
Motivation: As next generation sequencing is rapidly adding new genomes, their
correct placement in the taxonomy needs verification. However,
the current methods for confirming classification of a taxon or
suggesting revision for a potential misplacement relies on
computationally intense multi-sequence alignment followed by an
iterative adjustment of the distance matrix. Due to
intra-heterogeneity issues with the 16S rRNA marker, no
classifier is available for sub-genus level that could readily
suggest a classification for a novel 16S rRNA sequence.
Metagenomics further complicates the issue by generating
fragmented 16S rRNA sequences. This paper proposes a novel
alignment-free method for representing the microbial profiles
using Extensible Markov Models (EMM) with an extended
Karlin-Altschul statistical framework similar to the classic
alignment paradigm. We propose a Log Odds (LOD) score
classifier based on Gumbel difference distribution that
confirms correct classifications with statistical significance
qualifications and suggests revisions where necessary.
Results: We tested our method by generating a sub-genus level
classifier with which we re-evaluated classifications of 676
microbial organisms using the NCBI FTP database for the 16S
rRNA. The results confirm current classification for all genera
while ascertaining significance at 95\%. Furthermore, this novel
classifier isolates heterogeneity issues to a mere 12
strains while confirming classifications with
significance qualification for the remaining 98\%. The
models require less memory than that needed by
multi-sequence alignments and have better time
complexity than the current methods. The classifier
operates at sub-genus level and thus outperforms the
naive Bayes classifier of the RNA Database Project
where much of the taxonomic analysis is available
online. Finally, using information redundancy in model
building, we show that the method applies to
metagenomic fragment classification of 19 E.coli
strains.
},
url = {http://bioinformatics.oxfordjournals.org/cgi/content/abstract/btq349v1},
note = {Advance Access published July 12, 2010.}
}
@article{hahsler:Hahsler2010,
author = {Michael Hahsler and Margaret H. Dunham},
title = {\pkg{rEMM}: Extensible Markov Model for Data Stream
Clustering in \proglang{R}},
journal = {Journal of Statistical Software},
year = {2010},
volume = {35},
number = {5},
pages = {1--31},
url = {http://www.jstatsoft.org/v35/i05/},
abstract = {
Clustering streams
of continuously arriving data has become an important application of
data mining in recent years and efficient algorithms have been proposed
by several researchers. However, clustering alone neglects the fact
that data in a data stream is not only characterized by the proximity
of data points which is used by clustering, but also by a temporal
component. The Extensible Markov Model (EMM) adds the temporal
component to data stream clustering by superimposing a dynamically
adapting Markov Chain. In this paper we introduce the implementation of
the R extension package rEMM which implements EMM and we discuss some
examples and applications.
}
}
@article{hahsler:Hahsler2007g,
author = {Michael Hahsler and Kurt Hornik},
title = {{TSP} -- {I}nfrastructure for the Traveling Salesperson
Problem},
journal = {Journal of Statistical Software},
year = {2007},
volume = {23},
pages = {1-21},
number = {2},
month = {December},
abstract = {
The traveling salesperson (or, salesman) problem (TSP) is a well known and
important combinatorial optimization problem. The goal is to find the
shortest tour that visits each city in a given list exactly once and then
returns to the starting city. Despite this simple problem statement,
solving the TSP is difficult since it belongs to the class of NP-complete
problems. The importance of the TSP arises besides from its theoretical
appeal from the variety of its applications. Typical applications in
operations research include vehicle routing, computer wiring, cutting
wallpaper and job sequencing. The main application in statistics is
combinatorial data analysis, e.g., reordering rows and columns of data
matrices or identifying clusters. In this paper we introduce the
R~package TSP which provides a basic infrastructure for
handling and solving the traveling salesperson problem. The package
features S3 classes for specifying a TSP and its (possibly optimal)
solution as well as several heuristics to find good solutions. In addition,
it provides an interface to Concorde, one of the best exact TSP solvers
currently available.},
issn = {1548-7660},
url = {http://www.jstatsoft.org/v23/i02},
pdf = {http://michael.hahsler.net/research/TSP_jss2007/v23i02/v23i02.pdf}
}
@article{hahsler:Hahsler2008,
author = {Michael Hahsler and Kurt Hornik and Christian Buchta},
title = {Getting Things in Order: An Introduction to the {R}
Package seriation},
journal = {Journal of Statistical Software},
year = {2008},
volume = {25},
pages = {1--34},
number = {3},
month = {March},
abstract = {Seriation, i.e., finding a linear order for a set of objects
given data and a loss or merit function, is a basic problem in data
analysis. Caused by the problem's combinatorial nature, it is hard
to solve for all but very small sets. Nevertheless, both exact
solution methods and heuristics are available. In this paper we
present the package~seriation which provides the infrastructure for
seriation with R. The infrastructure comprises data structures to
represent linear orders as permutation vectors, a wide array of
seriation methods using a consistent interface, a method to calculate
the value of various loss and merit functions, and several
visualization techniques which build on seriation. To illustrate how
easily the package can be applied for a variety of applications, a
comprehensive collection of examples is presented.},
issn = {1548-7660},
url = {http://www.jstatsoft.org/v25/i03},
pdf = {http://michael.hahsler.net/research/seriation_JSS2008/seriation.pdf}
}
@article{hahsler:Hahsler2007c,
author = {Michael Hahsler and Kurt Hornik},
title = {New Probabilistic Interest Measures for Association Rules},
journal = {Intelligent Data Analysis},
year = {2007},
volume = {11},
number = {5},
pages = {437--455},
abstract = {Mining association rules is an important technique for discovering
meaningful patterns in transaction databases. Many different measures
of interestingness have been proposed for association rules. However,
these measures fail to take the probabilistic properties of the mined
data into account. In this paper, we start with presenting a simple
probabilistic framework for transaction data which can be used to
simulate transaction data when no associations are present. We use
such data and a real-world database from a grocery outlet to explore
the behavior of confidence and lift, two popular interest measures
used for rule mining. The results show that confidence is systematically
influenced by the frequency of the items in the left hand side of
rules and that lift performs poorly to filter random noise in transaction
data. Based on the probabilistic framework we develop two new interest
measures, hyper-lift and hyper-confidence, which can be used to filter
or order mined association rules. The new measures show significantly
better performance than lift for applications where spurious rules
are problematic. },
issn = {1088-467X},
url = {http://iospress.metapress.com/openurl.asp?genre=article&issn=1088-467X&volume=11&issue=5&spage=437},
pdf = {http://michael.hahsler.net/research/hyperConfidence_IDA2007/hyperConfidence.pdf}
}
@article{hahsler:Hahsler2007d,
author = {Michael Hahsler and Christian Buchta and Kurt Hornik},
title = {Selective Association Rule Generation},
journal = {Computational Statistics},
year = {2008},
volume = {23},
pages = {303--315},
number = {2},
month = {April},
doi = {10.1007/s00180-007-0062-z},
url = {http://dx.doi.org/10.1007/s00180-007-0062-z},
abstract = {Mining association rules is a popular and well researched
method for discovering interesting relations between variables in
large databases. A practical problem is that at medium to low support
values often a large number of frequent itemsets and an even larger
number of association rules are found in a database. A widely used
approach is to gradually increase minimum support and minimum
confidence or to filter the found rules using increasingly strict
constraints on additional measures of interestingness until the set of
rules found is reduced to a manageable size. In this paper we describe
a different approach which is based on the idea to first define a set
of ``interesting'' itemsets (e.g., by a mixture of mining and expert
knowledge) and then, in a second step to selectively generate rules
for only these itemsets. The main advantage of this approach over
increasing thresholds or filtering rules is that the number of rules
found is significantly reduced while at the same time it is not
necessary to increase the support and confidence thresholds which
might lead to missing important information in the database.
},
issn = {0943-4062},
pdf = {http://michael.hahsler.net/research/ruleGeneration_cost2007/ruleInduction_CompStat.pdf}
}
@article{hahsler:Reutterer2007,
author = {Thomas Reutterer and Michael Hahsler and Kurt Hornik},
title = {{Data Mining und Marketing am Beispiel der explorativen Warenkorbanalyse}},
journal = {{Marketing ZFP}},
year = {2007},
volume = {29},
number = {3},
pages = {165--181},
abstract = {Techniken des Data Mining stellen f\"ur die Marketingforschung
und {}-praxis eine zunehmend bedeutsamere Bereicherung des
herk\"ommlichen Methodenarsenals dar. Mit dem Einsatz solcher
prim\"ar datengetriebener Analysewerkzeuge wird das Ziel verfolgt,
marketingrelevante Informationen ''intelligent'' aus
gro{\ss}en Datenbanken (sog. Data Warehouses) zu extrahieren und
f\"ur die weitere Entscheidungsvorbereitung in geeigneter Form
aufzubereiten. Im vorliegenden Beitrag werden Ber\"uhrungspunkte
zwischen Data Mining und Marketing diskutiert und der konkrete
Einsatz ausgew\"ahlter Data{}-Mining{}-Methoden am Beispiel der
explorativen Warenkorb{}- bzw. Sortimentsverbundanalyse f\"ur einen
Transaktionsdatensatz aus dem Lebensmitteleinzelhandel demonstriert.
Zur Anwendung gelangen dabei Techniken aus dem Bereich der
klassischen Affinit\"atsanalyse, ein \textit{K}{}-Medoid{}-Verfahren
der Clusteranalyse sowie Werkzeuge zur Generierung und
anschlie{\ss}enden Beurteilung von Assoziationsregeln zwischen im
Sortiment enthaltenen Warengruppen. Die Vorgehensweise wird dabei
anhand des mit der Statistik{}-Software R frei verf\"ugbaren
Erweiterungspakets \textbf{arules} illustriert.
},
url = {http://vahlen.becksche.de/zeitschriften/}
}
@article{hahsler:Hahsler2006e,
author = {Christoph Breidert and Michael Hahsler and Thomas Reutterer},
title = {A Review of Methods for Measuring Willingness-to-Pay},
journal = {Innovative Marketing},
volume = {2},
number = {4},
pages = {8--32},
year = {2006},
abstract = {Knowledge about a product's willingness-to-pay on behalf of its (potential)
customers plays a crucial role in many areas of marketing management
like pricing decisions or new product development. Numerous approaches
to measure willingness-to-pay with differential conceptual foundations
and methodological implications have been presented in the relevant
literature so far. This article provides the reader with a systematic
overview of the relevant literature on these competing approaches
and associated schools of thought, recognizes their respective merits
and discusses obstacles and issues regarding their adoption to measuring
willingness-to-pay. Because of its practical relevance, special focus
will be put on indirect surveying techniques and, in particular,
conjoint-based applications will be discussed in more detail. The
strengths and limitations of the individual approaches are discussed
and evaluated from a managerial point of view.},
issn = {1814-2427},
url = {http://www.businessperspectives.org/en/journalim},
pdf = {http://michael.hahsler.net/research/wtp_innovative_marketing2006/wtp_breidert_hahsler_reutterer_preprint.pdf}
}
@article{hahsler:Hahsler2006a,
author = {Michael Hahsler},
title = {A Model-Based Frequency Constraint for Mining Associations from Transaction
Data},
journal = {Data Mining and Knowledge Discovery},
year = {2006},
volume = {13},
pages = {137--166},
number = {2},
month = {September},
abstract = {Mining frequent itemsets is a popular method for finding associated
items in databases. For this method, support, the co-occurrence frequency
of the items which form an association, is used as the primary indicator
of the associations's significance. A single user-specified support
threshold is used to decided if associations should be further investigated.
Support has some known problems with rare items, favors shorter itemsets
and sometimes produces misleading associations. In this paper we
develop a novel model-based frequency constraint as an alternative
to a single, user-specified minimum support. The constraint utilizes
knowledge of the process generating transaction data by applying
a simple stochastic mixture model (the NB model) which allows for
transaction data's typically highly skewed item frequency distribution.
A user-specified precision threshold is used together with the model
to find local frequency thresholds for groups of itemsets. Based
on the constraint we develop the notion of NB-frequent itemsets and
adapt a mining algorithm to find all NB-frequent itemsets in a database.
In experiments with publicly available transaction databases we show
that the new constraint provides improvements over a single minimum
support threshold and that the precision threshold is more robust
and easier to set and interpret by the user. },
doi = {10.1007/s10618-005-0026-2},
issn = {1384-5810},
pdf = {http://michael.hahsler.net/research/nbd_dami2005/nbd_associationrules_dami2005.pdf},
url = {http://dx.doi.org/10.1007/s10618-005-0026-2}
}
@article{hahsler:Hahsler2005f,
author = {Michael Hahsler and Bettina Gr{\"u}n and Kurt Hornik},
title = {arules -- {A} Computational Environment for Mining Association Rules
and Frequent Item Sets},
journal = {Journal of Statistical Software},
year = {2005},
volume = {14},
pages = {1--25},
number = {15},
month = {October},
abstract = {Mining frequent itemsets and association rules is a popular and well
researched approach for discovering interesting relationships between
variables in large databases. The R package arules presented in this
paper provides a basic infrastructure for creating and manipulating
input data sets and for analyzing the resulting itemsets and rules.
The package also includes interfaces to two fast mining algorithms,
the popular C implementations of Apriori and Eclat by Christian Borgelt.
These algorithms can be used to mine frequent itemsets, maximal frequent
itemsets, closed frequent itemsets and association rules.},
issn = {1548-7660},
pdf = {http://michael.hahsler.net/research/arules_jss2005/v14i15.pdf},
url = {http://www.jstatsoft.org/v14/i15}
}
@article{hahsler:Hahsler2003b,
author = {Michael Hahsler},
title = {Integrating Digital Document Acquisition into a University Library:
A Case Study of Social and Organizational Challenges},
journal = {Journal of Digital Information Management},
year = {2003},
volume = {1},
pages = {162--171},
number = {4},
month = dec,
abstract = {In this article we report on the effort of the university library
of the Vienna University of Economics and Business Administration
to integrate a digital library component for research documents authored
at the university into the existing library infrastructure. Setting
up a digital library has become a relatively easy task using the
current data base technology and the components and tools freely
available. However, to integrate such a digital library into existing
library systems and to adapt existing document acquisition work-flows
in the organization are non-trivial tasks. We use a research frame
work to identify the key players in this change process and to analyze
their incentive structures. Then we describe the light-weight integration
approach employed by our university and show how it provides incentives
to the key players and at the same time requires only minimal adaptation
of the organization in terms of changing existing work-flows. Our
experience suggests that this light-weight integration offers a cost
efficient and low risk intermediate step towards switching to exclusive
digital document acquisition.},
issn = {0972-7272},
pdf = {http://michael.hahsler.net/research/ePub_jdim2003/IntegratingDDAcquisition_final.pdf},
url = {http://www.dirf.org/jdim/v1i4.htm}
}
@article{hahsler:GeyerSchulz2002a,
author = {Wolfgang Gaul and Andreas Geyer-Schulz and Michael Hahsler and Lars
Schmidt-Thieme},
title = {{eMarketing mittels Recommendersystemen}},
journal = {{Marketing ZFP}},
year = {2002},
volume = {24},
pages = {47--55},
abstract = {Recommendersysteme liefern einen wichtigen Beitrag f{\"u}r die Ausgestaltung
von eMarketing Aktivit{\"a}ten. Ausgehend von einer Diskussion von
Input/Output Charakteristika zur Beschreibung solcher Systeme, die
bereits eine geeignete Unterscheidung praxisrelevanter Erscheinungsformen
erlauben, wird motiviert, warum eine solche Charakterisierung durch
die Einbeziehung methodischer Aspekte aus der Marketing Forschung
angereichert werden muss. Ein auf der Theorie des Wiederkaufverhaltens
basierendes Recommendersystem sowie ein System, das Empfehlungen
mittels Analyse des Navigationsverhaltens von Site Besuchern erzeugt,
werden vorgestellt. Am Beispiel der Amazon Site werden die Marketing
M{\"o}glichkeiten von Recommendersystemen verdeutlicht. Abschlie{\ss}end
wird zur Abrundung auf weitere Literatur mit Recommendersystem Bezug
eingegangen. In einem Ausblick werden Hinweise gegeben, in welche
Richtungen Weiterentwicklungen geplant sind.},
series = {Spezialausgabe ''E-Marketing''},
url = {http://vahlen.becksche.de/zeitschriften/}
}
@article{hahsler:GeyerSchulz2001b,
author = {Andreas Geyer-Schulz and Michael Hahsler and Maximillian Jahn},
title = {Educational and Scientific Recommender Systems: Designing the Information
Channels of the Virtual University},
journal = {International Journal of Engineering Education},
year = {2001},
volume = {17},
pages = {153--163},
number = {2},
abstract = {In this article we investigate the role of recommender systems and
their potential in the educational and scientific environment of
a Virtual University. The key idea is to use the information aggregation
capabilities of a recommender system to improve the tutoring and
consulting services of a Virtual University in an automated way and
thus scale tutoring and consulting in a personalized way to a mass
audience. We describe the recommender services of myVU, the collection
of the personalized services of the Virtual University (VU) of the
Vienna University of Economics and Business Administration which
are based on observed user behavior and self assignment of experience
which are currently field-tested. We show, how the usual mechanism
design problems inherent to recommender systems are addressed in
this prototype.},
issn = {0949-149X},
pdf = {http://michael.hahsler.net/research/recomm_ijee2001/paper.pdf},
series = {Special Issue on Virtual Universities},
url = {http://www.ijee.dit.ie/contents/c170201.html}
}
@article{hahsler:GeyerSchulz1999,
author = {Andreas Geyer-Schulz and Michael Hahsler and Georg Schneider},
title = {The Virtual University and Its Embedded Agents},
journal = {{\"O}{G}{A}{I}{} Journal},
year = {1999},
volume = {18},
pages = {14--19},
number = {1},
abstract = {In this article we present the current state of usage of (intelligent)
Internet agents in the Virtual University (VU) of the Vienna University
of Economics and BA. We discuss opportunities and challenges for
the development of several classes of agents and their sensor systems.
More specifically, agents of the following classes embedded in the
virtual university system will be presented: (1) robots which support
navigation services and (2) robots which support communication and
collaboration.},
issn = {0254-4326}
}
@article{hahsler:GeyerSchulz1998,
author = {Peter Bruhn and Andreas Geyer-Schulz and Michael Hahsler and Markus
Mottel},
title = {Genetic Machine Learning and Intelligent Internet Agents},
journal = {{\"O}{G}{A}{I}{} Journal},
year = {1998},
volume = {17},
pages = {18--25},
number = {1},
abstract = { In this paper we report on the status quo of the current machine
learning research projects at the Department of Applied Computer
Science of the Institute of Information Processing and Information
Economics of the Vienna University of Economics and Business Administration.
The current research activities can be categorized as follows: (1)
Development of a theoretic framework of genetic programming. (2)
Application of genetic programming for managerial and economic decision-making
and for breeding agents' strategies in organizational learning. (3)
Development, adaptation, and integration of (intelligent) Internet
agents for support of the virtual organizations. (4) Development
of an infrastructure for intelligent Internet agents in the ''Living
Lectures - Virtual University'' project. (5) Cost-benefit analysis
of agents, analysis of tactical and strategic consequences of agents
and the analysis of their economic applications. },
issn = {0254-4326}
}
This file was generated by bibtex2html 1.94.