@inproceedings{hahsler:ElDayeh2011,
author = {Maya Eldayeh and Michael Hahsler},
title = {Analyzing incomplete biological pathways using network motifs},
booktitle = {27th Symposium On Applied Computing (SAC 2012)},
year = {2012},
pages = {},
location = {Riva del Garda, Italy},
date = {March 26--30, 2012},
publisher = {ACM},
editor = {},
abstract = {
It is widely accepted that existing knowledge about the structure
of many biological pathways is incomplete and uncovering
missing proteins in a biological pathway can help guide targeted
therapy and drug design and discovery. Current approaches
address the complex/pathway membership problem by identifying
potentially missing proteins using probabilistic protein-protein
interaction (PPI) networks. In this paper we extend the idea of the
pathway membership problem and define the pathway completion
problem. In addition to finding possible protein candidates, this
problem requires predicting the locations and connections of these
proteins within a given incomplete pathway. We propose the use
of network motifs to tackle the pathway completion problem. We
present an algorithm which breaks down an incomplete pathway
into a set of constituent motifs and then uses the proteins retrieved
from a probabilistic PPI network to improve the motifs. This new
approach also has the potential to improve solutions to the
membership problem by better exploiting the local structures
represented by network motifs. These new ideas are illustrated
with a set of preliminary experiments.
},
pdf = {http://michael.hahsler.net/research/BiologicalPathway/Pathway_Motifs_SAC2012.pdf},
category = {bioinformatics}
}
@inproceedings{hahsler:Jovanovic2011,
author = {Vladimir Jovanovic and Margaret H. Dunham and Michael Hahsler and Yu Su},
title = {Evaluating Hurricane Intensity Prediction Techniques in Real Time},
booktitle = {Third IEEE ICDM Workshop on Knowledge Discovery from Climate Data, Proceedings of the of the 2011 IEEE International Conference on Data Mining Workshops (ICDMW 2011)},
year = {2011},
pages = {},
location = {Vancouver, Canada},
date = {December 10, 2011},
publisher = {IEEE},
editor = {},
abstract = {
While the accuracy of hurricane track prediction
has been improving, predicting intensity, the maximum sustained wind
speed, is still a very difficult challenge. This is problematic because
the destructive power of a hurricane is directly related to its
intensity. In this paper, we present Prediction Intensity Interval
model for Hurricanes (PIIH) which combines sophisticated data mining
techniques to create an online real time model for accurate intensity
predictions and we present a web-based framework to dynamically compare
PIIH to operational models used by the National Hurricane Center (NHC).
The created dynamic website tracks, compares, and provides
visualization to facilitate immediate comparisons of prediction
techniques. This paper is a work in progress paper reporting on both,
new features of the PIIH model and online visualization of the accuracy of
that model as compared to other techniques.
},
pdf = {http://michael.hahsler.net/research/Hurricane/ICDMW_11/PIIH_Evaluation.pdf},
category = {stream mining, climate}
}
@article{hahsler:Hahsler2011d,
author = {Michael Hahsler and Sudheer Chelluboina and
Kurt Hornik and Christian Buchta},
title = {The arules {R}-Package Ecosystem: Analyzing Interesting Patterns from Large Transaction Datasets},
journal = {Journal of Machine Learning Research},
year = {2011},
volume = {12},
number = {},
pages = {1977--1981},
url = {http://jmlr.csail.mit.edu/papers/v12/hahsler11a.html},
abstract = {
This paper describes the ecosystem of R add-on packages developed around
the infrastructure provided by the package arules. The packages
provide comprehensive functionality for analyzing interesting patterns
including frequent itemsets, association rules, frequent sequences and
for building applications like associative classification. After
discussing the ecosystem's design we illustrate the ease of mining
and visualizing rules with a short example.
},
category = {association rules}
}
@inproceedings{hahsler:Hahsler2011c,
author = {Michael Hahsler and Sudheer Chelluboina},
title = {Visualizing Association Rules in Hierarchical Groups},
booktitle = {42nd Symposium on the Interface:
Statistical, Machine Learning, and Visualization Algorithms
(Interface 2011)},
year = {2011},
pages = {},
location = {Cary, North Carolina},
date = {June 1--3, 2011},
publisher = {The Interface Foundation of North America},
editor = {},
abstract = {
Association rule mining is one of the most popular data mining methods.
However, mining association rules often results in a very large number
of found rules, leaving the analyst with the task to go through all the
rules and discover interesting ones. Sifting manually through large
sets of rules is time consuming and strenuous. Visualization has a long
history of making large amounts of data better accessible using
techniques like selecting and zooming. However, most association rule
visualization techniques are still falling short when it comes to a
large number of rules. In this paper we present a new interactive
visualization technique which lets the user navigate through a
hierarchy of groups of association rules. We demonstrate how this new
visualization techniques can be used to analyze a large sets of
association rules with examples from our implementation in the
R-package arulesViz.
},
pdf = {http://michael.hahsler.net/research/Interface2011/arulesViz/arulesViz.pdf},
category = {association rules, visualization}
}
@inproceedings{hahsler:Hahsler2011,
author = {Michael Hahsler and Margaret H. Dunham},
title = {Temporal Structure Learning for Clustering Massive Data Streams
in Real-Time},
booktitle = {{SIAM} Conference on Data Mining ({SDM11})},
year = {2011},
pages = {664--675},
location = {Mesa, Arizona},
date = {April 28--30, 2011},
publisher = {SIAM},
editor = {},
abstract = {
This paper describes one of the first attempts to model the temporal
structure of massive data streams in real-time using data stream
clustering. Recently, many data stream clustering algorithms have
been developed which efficiently find a partition of the data
points in a data stream. However, these algorithms disregard the
information represented by the temporal order of the data points in
the stream which for many applications is an important part of the
data stream. In this paper we propose a new framework called
Temporal Relationships Among Clusters for Data Streams (TRACDS)
which allows to learn the temporal structure while clustering a
data stream. We identify, organize and describe the clustering
operations which are used by state-of-the-art data stream
clustering algorithms. Then we show that by defining a set of new
operations to transform Markov Chains with states representing
clusters dynamically, we can efficiently capture temporal ordering
information. This framework allows us to preserve temporal
relationships among clusters for any state-of-the-art data stream
clustering algorithm with only minimal overhead.
To investigate the usefulness of TRACDS, we evaluate the improvement of
TRACDS over pure data stream clustering for anomaly detection using
several synthetic and real-world data sets. The experiments show that
TRACDS is able to considerably improve the results even if we introduce
a high rate of incorrect time stamps which is typical for real-world
data streams.
},
pdf = {http://michael.hahsler.net/research/TRACDS_SDM11/TRACDS_SDM11.pdf},
category = {stream mining}
}
@article{hahsler:Dunham2010b,
author = {Margaret H. Dunham and Michael Hahsler and Myra Spiliopoulou},
title = {Novel Data Stream Pattern Mining, {Report on the StreamKDD'10 Workshop}},
journal = {SIGKDD Explorations},
year = {2010},
volume = {12},
number = {2},
pages = {54--55},
url = {http://www.sigkdd.org/explorations/issue.php?volume=12&issue=2&year=2010&month=12},
abstract = {
This report summarizes the First International Workshop on
Novel Data Stream Pattern Mining held at the 16th ACM SIGKDD
International Conference on Knowledge Discovery and Data
Mining, on July 25 2010 in Washington, DC.
},
category = {stream mining}
}
@article{hahsler:Hahsler2011b,
author = {Michael Hahsler and Kurt Hornik},
title = {Dissimilarity Plots: {A} Visual Exploration Tool for Partitional Clustering},
journal = {Journal of Computational and Graphical Statistics},
year = {2011},
month = {June},
volume = {10},
number = {2},
pages = {335--354},
url = {http://pubs.amstat.org/doi/abs/10.1198/jcgs.2010.09139},
pdf = {http://michael.hahsler.net/research/dissplot_JCGS2011/dissplot_preprint.pdf},
abstract = {
For hierarchical clustering, dendrograms are a convenient
and powerful visualization technique. Although many visualization methods
have been suggested for partitional clustering, their usefulness
deteriorates quickly with increasing dimensionality of the data and/or they
fail to represent structure between and within clusters simultaneously. In
this paper we extend (dissimilarity) matrix shading with several reordering
steps based on seriation techniques. Both ideas, matrix shading and
reordering, have been well-known for a long time. However, only recent
algorithmic improvements allow us to solve or approximately solve the
seriation problem efficiently for larger problems. Furthermore, seriation
techniques are used in a novel stepwise process (within each cluster and
between clusters) which leads to a visualization technique that is
able to present the structure between clusters and the micro-structure
within clusters in one concise plot. This not only allows us to judge
cluster quality but also makes mis-specification of the number of clusters
apparent. We give a detailed discussion of the construction of
dissimilarity plots and demonstrate their usefulness with several examples.
Experiments show that dissimilarity plots scale very well with increasing
data dimensionality.
},
category = {seriation, visualization}
}
@inproceedings{hahsler:Yu2010,
author = {Yu Su and Sudheer Chelluboina and Michael Hahsler and Margaret H. Dunham},
title = {A New Data Mining Model for Hurricane Intensity Prediction},
booktitle = {Second IEEE ICDM Workshop on Knowledge Discovery from Climate Data: Prediction, Extremes and Impacts, Proceedings of the of the 2010 IEEE International Conference on Data Mining Workshops (ICDMW 2010)},
year = {2010},
pages = {98--105},
location = {Sydney, Australia},
date = {December 14, 2010},
publisher = {IEEE},
editor = {},
url = {http://www.computer.org/portal/web/csdl/doi/10.1109/ICDMW.2010.158},
abstract = {
This paper proposes a new hurricane intensity prediction model, WFL-EMM,
which is based on the data mining techniques of feature weight learning
(WFL) and Extensible Markov Model (EMM). The data features used are
those employed by one of the most popular intensity prediction models,
SHIPS. In our algorithm, the weights of the features are learned by a
genetic algorithm (GA) using historical hurricane data. As the GA's
fitness function we use the error of the intensity prediction by an EMM
learned using given feature weights. For fitness calculation we use a
technique similar to $k$-fold cross validation on the training data.
The best weights obtained by the genetic algorithm are used to build an
EMM with all training data. This EMM is then applied to predict the
hurricane intensities and compute prediction errors for the test data.
Using historical data for the named Atlantic tropical cyclones from
1982 to 2003, experiments demonstrate that WFL-EMM provides
significantly more accurate intensity predictions than SHIPS within 72
hours. Since we report here first results, we indicate how to improve
WFL-EMM in the future.
},
pdf = {http://michael.hahsler.net/research/Hurricane/ICDMW_10/05693288.pdf},
category = {stream mining, climate}
}
@book{hahsler:Dunham2010,
editor = {Margaret H. Dunham and Michael Hahsler and Myra Spiliopoulou},
title = {Proceedings of the First International Workshop on Novel Data Stream Pattern Mining Techniques (StreamKDD'10)},
publisher = {ACM Press},
year = 2010,
issn = {978-1-4503-0226-5},
location = {Washington, D.C.},
publisher = {ACM},
address = {New York, NY, USA},
url = {http://portal.acm.org/citation.cfm?id=1833280},
abstract = {Data stream mining gained in importance over the last years
because it is indispensable for many real applications such as
prediction and evolution of weather phenomena; security and anomaly
detection in networks; evaluating satellite data; and mining health
monitoring streams. Stream mining algorithms must take account of
the unique properties of stream data: infinite data, temporal
ordering, concept drifts and shifts, demand for scalability etc.
This workshop brings together scholars working in different areas of
learning on streams, including sensor data and other forms of accumulating
data. Most of the papers in the next pages are on unsupervised learning
with clustering methods. Issues addressed include the detection of outliers
and anomalies, evolutionary clustering and incremental clustering, learning
in subspaces of the complete feature space and learning with exploitation
of context, deriving models from text streams and visualizing them. },
category = {stream mining}
}
@article{hahsler:Kotamarti2010b,
author = {Kotamarti, Rao M. and Hahsler, Michael and Raiford, Douglas and McGee, Monnie and Dunham, Margaret H.},
title = {Analyzing Taxonomic Classification Using Extensible {M}arkov Models},
journal = {Bioinformatics},
volume = {26},
number = {18},
doi = {10.1093/bioinformatics/btq349},
year = {2010},
pages = {2235--2241},
abstract = {
Motivation: As next generation sequencing is rapidly adding new genomes, their
correct placement in the taxonomy needs verification. However,
the current methods for confirming classification of a taxon or
suggesting revision for a potential misplacement relies on
computationally intense multi-sequence alignment followed by an
iterative adjustment of the distance matrix. Due to
intra-heterogeneity issues with the 16S rRNA marker, no
classifier is available for sub-genus level that could readily
suggest a classification for a novel 16S rRNA sequence.
Metagenomics further complicates the issue by generating
fragmented 16S rRNA sequences. This paper proposes a novel
alignment-free method for representing the microbial profiles
using Extensible Markov Models (EMM) with an extended
Karlin-Altschul statistical framework similar to the classic
alignment paradigm. We propose a Log Odds (LOD) score
classifier based on Gumbel difference distribution that
confirms correct classifications with statistical significance
qualifications and suggests revisions where necessary.
Results: We tested our method by generating a sub-genus level
classifier with which we re-evaluated classifications of 676
microbial organisms using the NCBI FTP database for the 16S
rRNA. The results confirm current classification for all genera
while ascertaining significance at 95\%. Furthermore, this novel
classifier isolates heterogeneity issues to a mere 12
strains while confirming classifications with
significance qualification for the remaining 98\%. The
models require less memory than that needed by
multi-sequence alignments and have better time
complexity than the current methods. The classifier
operates at sub-genus level and thus outperforms the
naive Bayes classifier of the RNA Database Project
where much of the taxonomic analysis is available
online. Finally, using information redundancy in model
building, we show that the method applies to
metagenomic fragment classification of 19 E.coli
strains.
},
url = {http://bioinformatics.oxfordjournals.org/content/26/18/2235},
category = {bioinformatics}
}
@article{hahsler:Hahsler2010,
author = {Michael Hahsler and Margaret H. Dunham},
title = {\pkg{rEMM}: Extensible {M}arkov Model for Data Stream
Clustering in \proglang{R}},
journal = {Journal of Statistical Software},
year = {2010},
volume = {35},
number = {5},
pages = {1--31},
url = {http://www.jstatsoft.org/v35/i05/},
abstract = {
Clustering streams
of continuously arriving data has become an important application of
data mining in recent years and efficient algorithms have been proposed
by several researchers. However, clustering alone neglects the fact
that data in a data stream is not only characterized by the proximity
of data points which is used by clustering, but also by a temporal
component. The Extensible Markov Model (EMM) adds the temporal
component to data stream clustering by superimposing a dynamically
adapting Markov Chain. In this paper we introduce the implementation of
the R extension package rEMM which implements EMM and we discuss some
examples and applications.
},
category = {stream mining}
}
@inproceedings{hahsler:Kotamarti2010,
author = {Rao M. Kotamarti and Michael Hahsler and Douglas W. Raiford and Margaret H. Dunham},
title = {Sequence transformation to a complex signature form for consistent Phylogenetic tree using Extensible {M}arkov Model},
booktitle = {Proceedings of the 2010 IEEE Symposium on Computational Intelligence in Bioinformatics and Computational Biology (IEEE CIBCB 2010)},
year = {2010},
editor = {},
pages = {},
publisher = {IEEE},
abstract = {
Phylogenetic tree analysis using molecular sequences
continues to expand beyond the 16S rRNA marker. By addressing
the multi-copy issue known as the intra-heterogeneity,
this paper restores the focus in using the 16S rRNA marker.
Through use of a novel learning and model building algorithm,
the multiple gene copies are integrated into a compact complex
signature using the Extensible Markov Model (EMM). The
method clusters related sequence segments while preserving
their inherent order to create an EMM signature for a microbial
organism. A library of EMM signatures is generated
from which samples are drawn for phylogenetic analysis. By
matching the components of two signatures, referred to as
quasi-alignment, the differences are highlighted and scored.
Scoring quasi-alignments is done using adapted Karlin-Altschul
statistics to compute a novel distance metric. The metric satisfies
conditions of identity, symmetry, triangular inequality and the
four point rule required for a valid evolution distance metric.
The resulting distance matrix is input to PHYologeny Inference
Package (PHYLIP) to generate phylogenies using neighbor
joining algorithms. Through control of clustering in signature
creation, the diversity of similar organisms and their placement
in the phylogeny is explained. The experiments include analysis
of genus Burkholderia, a random microbial sample spanning
several phyla and a diverse sample that includes RNA of
Eukaryotic origin. The NCBI sequence data for 16S rRNA is
used for validation.
},
pdf = {http://michael.hahsler.net/research/EMMSA/EMMSA_CIBCB2010.pdf},
category = {bioinformatics}
}
@techreport{hahsler:Kotamarti2009,
author = {Rao M. Kotamarti and Douglas W. Raiford and Michael Hahsler and Yuhang Wang and Monnie McGee and Margaret H. Dunham},
title = {Targeted Genomic signature profiling with Quasi-alignment statistics},
institution = {COBRA Preprint Series},
year = {2009},
type = {Article},
number = {63},
month = {November},
abstract = {
Genome databases continue to expand with no change in the basic format of
sequence data. The prevalent use of the classic alignment based search
tools like BLAST have significantly pushed the limits of genome isolate
research. The relatively new frontier of Metagenomic research deals with
thousands of diverse genomes with newer demands beyond the current
homologue search and analysis. Compressing sequence data into a complex
form could facilitate a broader range of sequence analyses. To this end,
this research explores reorganizing sequence data as complex Markov
signatures also known as Extensible Markov Models. Markov models have
found successful application in biological sequence analysis
applications through small, but important extensions to the original
theory of Markov Chains. Extensible Markov Model (EMM) offers a novel
Quasi-alignment complement to the classic alignment based homologous
sequence search methods like BLAST. EMM based bioinformatic analysis
(EMMBA) incorporates automatic learning which allows the Markov chain
creation dynamically. Oligonucletide or genomic word frequencies form
the core sequence data in alignment free methods. EMMBA extends the
Karlin-Altschul statistics to bring forth an analogous E-Score
statistical significance to the quasi-alignment domain. By consolidating
a community of sequences into a single searchable profile, EMM
methodology further reduces the search space for classification. Through
dynamic generation of the score matrix for each community profile, EMMBA
fine tunes the score assignments. Each evaluation iteratively adjusts the
profile score matrix to account for point probabilities of the query to
ensure Karlin-Altschul assumptions are satisfied to derive meaningful
statistical signifi- cance. The presence of multiple quasi-alignments
resembles multiple local alignments of BLAST. Quasi-alignments are
scored based on a difference distribution of Gumbel scores. Species
signature profiles allow for statistical validation of novel species
identification. Working in EMM transformation space speeds up
classification and generates distance matrix for differentiation. The techniques
and metrics presented are validated using the microbial 16s rRNA sequence data
from NCBI.
},
url = {http://biostats.bepress.com/cobra/ps/art63/},
category = {bioinformatics}
}
@techreport{hahsler:Hahsler2009,
author = {Michael Hahsler and Kurt Hornik},
title = {Dissimilarity Plots: A Visual Exploration Tool for Partitional Clustering},
institution = {Research Report Series, Department of Statistics and Mathematics, Wirtschaftsuniversit{\"a}t Wien},
year = {2009},
type = {Report},
number = {89},
address = {Augasse 2--6, 1090 Wien, Austria},
month = {September},
abstract = {For hierarchical clustering, dendrograms provide convenient
and powerful visualization. Although many visualization methods have
been suggested for partitional clustering, their usefulness
deteriorates quickly with increasing dimensionality of the data and/or
they fail to represent structure between and within clusters
simultaneously. In this paper we extend (dissimilarity) matrix shading
with several reordering steps based on seriation. Both methods,
matrix shading and seriation, have been well-known for a long time.
However, only recent algorithmic improvements allow to use seriation
for larger problems. Furthermore, seriation is used in a novel
stepwise process (within each cluster and between clusters) which
leads to a visualization technique that is independent of the
dimensionality of the data. A big advantage is that it presents the
structure between clusters and the micro-structure within clusters
in one concise plot. This not only allows for judging
cluster quality but also makes mis-specification of the number of clusters
apparent. We give a detailed discussion of the construction of
dissimilarity plots and demonstrate their usefulness with several
examples.},
nopdf = {http://michael.hahsler.net/research/dissplot_workingpaper2009/dissplot.pdf},
url = {http://epub.wu.ac.at/id/eprint/1244},
category = {seriation, visualization}
}
@article{hahsler:Hahsler2007g,
author = {Michael Hahsler and Kurt Hornik},
title = {{TSP} -- {I}nfrastructure for the Traveling Salesperson
Problem},
journal = {Journal of Statistical Software},
year = {2007},
volume = {23},
pages = {1-21},
number = {2},
month = {December},
abstract = {
The traveling salesperson (or, salesman) problem (TSP) is a well known and
important combinatorial optimization problem. The goal is to find the
shortest tour that visits each city in a given list exactly once and then
returns to the starting city. Despite this simple problem statement,
solving the TSP is difficult since it belongs to the class of NP-complete
problems. The importance of the TSP arises besides from its theoretical
appeal from the variety of its applications. Typical applications in
operations research include vehicle routing, computer wiring, cutting
wallpaper and job sequencing. The main application in statistics is
combinatorial data analysis, e.g., reordering rows and columns of data
matrices or identifying clusters. In this paper we introduce the
R~package TSP which provides a basic infrastructure for
handling and solving the traveling salesperson problem. The package
features S3 classes for specifying a TSP and its (possibly optimal)
solution as well as several heuristics to find good solutions. In addition,
it provides an interface to Concorde, one of the best exact TSP solvers
currently available.},
issn = {1548-7660},
url = {http://www.jstatsoft.org/v23/i02},
nopdf = {http://michael.hahsler.net/research/TSP_jss2007/v23i02/v23i02.pdf},
category = {seriation, visualization}
}
@article{hahsler:Hahsler2008,
author = {Michael Hahsler and Kurt Hornik and Christian Buchta},
title = {Getting Things in Order: An Introduction to the {R}
Package seriation},
journal = {Journal of Statistical Software},
year = {2008},
volume = {25},
pages = {1--34},
number = {3},
month = {March},
abstract = {Seriation, i.e., finding a linear order for a set of objects
given data and a loss or merit function, is a basic problem in data
analysis. Caused by the problem's combinatorial nature, it is hard
to solve for all but very small sets. Nevertheless, both exact
solution methods and heuristics are available. In this paper we
present the package~seriation which provides the infrastructure for
seriation with R. The infrastructure comprises data structures to
represent linear orders as permutation vectors, a wide array of
seriation methods using a consistent interface, a method to calculate
the value of various loss and merit functions, and several
visualization techniques which build on seriation. To illustrate how
easily the package can be applied for a variety of applications, a
comprehensive collection of examples is presented.},
issn = {1548-7660},
url = {http://www.jstatsoft.org/v25/i03},
nopdf = {http://michael.hahsler.net/research/seriation_JSS2008/seriation.pdf},
category = {seriation, visualization}
}
@techreport{hahsler:Hahsler2007e,
author = {Michael Hahsler and Kurt Hornik and Christian Buchta},
title = {Getting Things in Order: An Introduction to the {R} package seriation},
institution = {Research Report Series, Department of Statistics and Mathematics,
Wirtschaftsuniversit{\"a}t Wien},
year = {2007},
type = {Report},
number = {58},
address = {Augasse 2--6, 1090 Wien, Austria},
month = {August},
abstract = { Seriation, i.e., finding a linear order for a set of objects
given data and a loss or merit function, is a basic problem in data
analysis. Caused by the problem's combinatorial nature, it is
hard to solve for all but very small sets. Nevertheless, both exact
solution methods and heuristics are available. In this paper we
present the package seriation which provides the infrastructure for
seriation with R. The infrastructure comprises data structures to
represent linear orders as permutation vectors, a wide array of
seriation methods using a consistent interface, a method to calculate
the value of various loss and merit functions, and several
visualization techniques which build on seriation. To illustrate how
easily the package can be applied for a variety of applications, a
comprehensive collection of examples is presented. },
nopdf = {http://michael.hahsler.net/research/seriation_working2007/seriation.pdf},
url = {http://epub.wu.ac.at/id/eprint/852},
category = {seriation, visualization}
}
@inproceedings{hahsler:Hahsler2007b,
author = {Christoph Breidert and Michael Hahsler},
title = {Adaptive Conjoint Analysis for Pricing Music Downloads},
booktitle = {Advances in Data Analysis, Proceedings of the 30th Annual Conference
of the Gesellschaft f{\"u}r Klassifikation e.V., Freie Universit\"at
Berlin, March 8--10, 2006},
year = {2007},
pages = {409--416},
editor = {R. Decker and H.-J. Lenz},
series = {Studies in Classification, Data Analysis, and Knowledge Organization},
publisher = {Springer-Verlag},
abstract = {Finding the right pricing for music downloads is of ample importance
to the recording industry and music download service providers. For
the recently introduced music downloads, reference prices are still
developing and to find a revenue maximizing pricing scheme is a challenging
task. The most commonly used approach is to employ linear pricing
(e.g., iTunes, musicload). Lately, subscription models have emerged,
offering their customers unlimited access to streaming music for
a monthly fee (e.g., Napster, RealNetworks). However, other pricing
strategies could also be used, such as quantity rebates starting
at certain download volumes. Research has been done in this field
and Buxmann et al. (2005) have shown that price cuts can improve
revenue. In this paper we apply different approaches to estimate
consumer's willingness to pay (WTP) for music downloads and compare
our findings with the pricing strategies currently used in the market.
To make informed decisions about pricing, knowledge about the consumer's
WTP is essential. Three approaches based on adaptive conjoint analysis
to estimate the WTP for bundles of music downloads are compared.
Two of the approaches are based on a status-quo product (at market
price and alternatively at an individually self-stated price), the
third approach uses a linear model assuming a fixed utility per title.
All three methods seem to be robust and deliver reasonable estimations
of the respondent's WTPs. However, all but the linear model need
an externally set price for the status-quo product which can introduce
a bias.},
pdf = {http://michael.hahsler.net/research/conjoint_gfkl2006/conjoint_music.pdf},
url = {http://dx.doi.org/10.1007/978-3-540-70981-7},
category = {marketing}
}
@inproceedings{hahsler:Hahsler2007,
author = {Michael Hahsler and Kurt Hornik},
title = {Building on the arules Infrastructure for Analyzing Transaction Data
with {R}},
booktitle = {Advances in Data Analysis, Proceedings of the 30th Annual Conference
of the Gesellschaft f{\"u}r Klassifikation e.V., Freie Universit\"at
Berlin, March 8--10, 2006},
pages = {449--456},
year = {2007},
editor = {R. Decker and H.-J. Lenz},
series = {Studies in Classification, Data Analysis, and Knowledge Organization},
publisher = {Springer-Verlag},
abstract = {The free and extensible statistical computing environment R with its
enormous number of extension packages already provides many state-of-the-art
techniques for data analysis. Support for association rule mining,
a popular exploratory method which can be used, among other purposes,
for uncovering cross-selling opportunities in \emph{market baskets,}
has become available recently with the R extension package~arules.
After a brief introduction to transaction data and association rules,
we present the formal framework implemented in arules and demonstrate
how clustering and association rule mining can be applied together
using a market basket data set from a typical retailer. This paper
shows that implementing a basic infrastructure with formal classes
in R provides an extensible basis which can very efficiently be employed
for developing new applications (such as clustering transactions)
in addition to association rule mining.},
pdf = {http://michael.hahsler.net/research/arules_gfkl2006/arules_gfkl2006.pdf},
url = {http://dx.doi.org/10.1007/978-3-540-70981-7},
category = {association rules}
}
@article{hahsler:Hahsler2007c,
author = {Michael Hahsler and Kurt Hornik},
title = {New Probabilistic Interest Measures for Association Rules},
journal = {Intelligent Data Analysis},
year = {2007},
volume = {11},
number = {5},
pages = {437--455},
abstract = {Mining association rules is an important technique for discovering
meaningful patterns in transaction databases. Many different measures
of interestingness have been proposed for association rules. However,
these measures fail to take the probabilistic properties of the mined
data into account. In this paper, we start with presenting a simple
probabilistic framework for transaction data which can be used to
simulate transaction data when no associations are present. We use
such data and a real-world database from a grocery outlet to explore
the behavior of confidence and lift, two popular interest measures
used for rule mining. The results show that confidence is systematically
influenced by the frequency of the items in the left hand side of
rules and that lift performs poorly to filter random noise in transaction
data. Based on the probabilistic framework we develop two new interest
measures, hyper-lift and hyper-confidence, which can be used to filter
or order mined association rules. The new measures show significantly
better performance than lift for applications where spurious rules
are problematic. },
issn = {1088-467X},
url = {http://iospress.metapress.com/openurl.asp?genre=article&issn=1088-467X&volume=11&issue=5&spage=437},
pdf = {http://michael.hahsler.net/research/hyperConfidence_IDA2007/hyperConfidence.pdf},
category = {association rules}
}
@article{hahsler:Hahsler2007d,
author = {Michael Hahsler and Christian Buchta and Kurt Hornik},
title = {Selective Association Rule Generation},
journal = {Computational Statistics},
year = {2008},
volume = {23},
pages = {303--315},
number = {2},
month = {April},
doi = {10.1007/s00180-007-0062-z},
url = {http://dx.doi.org/10.1007/s00180-007-0062-z},
abstract = {Mining association rules is a popular and well researched
method for discovering interesting relations between variables in
large databases. A practical problem is that at medium to low support
values often a large number of frequent itemsets and an even larger
number of association rules are found in a database. A widely used
approach is to gradually increase minimum support and minimum
confidence or to filter the found rules using increasingly strict
constraints on additional measures of interestingness until the set of
rules found is reduced to a manageable size. In this paper we describe
a different approach which is based on the idea to first define a set
of ``interesting'' itemsets (e.g., by a mixture of mining and expert
knowledge) and then, in a second step to selectively generate rules
for only these itemsets. The main advantage of this approach over
increasing thresholds or filtering rules is that the number of rules
found is significantly reduced while at the same time it is not
necessary to increase the support and confidence thresholds which
might lead to missing important information in the database.
},
issn = {0943-4062},
pdf = {http://michael.hahsler.net/research/ruleGeneration_cost2007/ruleInduction_CompStat.pdf},
category = {association rules}
}
@article{hahsler:Reutterer2007,
author = {Thomas Reutterer and Michael Hahsler and Kurt Hornik},
title = {{Data Mining und Marketing am Beispiel der explorativen Warenkorbanalyse}},
journal = {{Marketing ZFP}},
year = {2007},
volume = {29},
number = {3},
pages = {165--181},
abstract = {Techniken des Data Mining stellen f\"ur die Marketingforschung
und {}-praxis eine zunehmend bedeutsamere Bereicherung des
herk\"ommlichen Methodenarsenals dar. Mit dem Einsatz solcher
prim\"ar datengetriebener Analysewerkzeuge wird das Ziel verfolgt,
marketingrelevante Informationen ''intelligent'' aus
gro{\ss}en Datenbanken (sog. Data Warehouses) zu extrahieren und
f\"ur die weitere Entscheidungsvorbereitung in geeigneter Form
aufzubereiten. Im vorliegenden Beitrag werden Ber\"uhrungspunkte
zwischen Data Mining und Marketing diskutiert und der konkrete
Einsatz ausgew\"ahlter Data{}-Mining{}-Methoden am Beispiel der
explorativen Warenkorb{}- bzw. Sortimentsverbundanalyse f\"ur einen
Transaktionsdatensatz aus dem Lebensmitteleinzelhandel demonstriert.
Zur Anwendung gelangen dabei Techniken aus dem Bereich der
klassischen Affinit\"atsanalyse, ein \textit{K}{}-Medoid{}-Verfahren
der Clusteranalyse sowie Werkzeuge zur Generierung und
anschlie{\ss}enden Beurteilung von Assoziationsregeln zwischen im
Sortiment enthaltenen Warengruppen. Die Vorgehensweise wird dabei
anhand des mit der Statistik{}-Software R frei verf\"ugbaren
Erweiterungspakets \textbf{arules} illustriert.
},
url = {http://vahlen.becksche.de/zeitschriften/},
category = {association rules, marketing}
}
@techreport{hahsler:Hahsler2006g,
author = {Michael Hahsler and Kurt Hornik},
title = {{TSP} -- {I}nfrastructure for the Traveling
Salesperson Problem},
institution = {Research Report Series, Department of Statistics and Mathematics,
Wirtschaftsuniversit{\"a}t Wien},
year = {2006},
type = {Report},
number = {45},
address = {Augasse 2--6, 1090 Wien, Austria},
month = {December},
abstract = {The traveling salesperson or salesman problem (TSP) is a well
known and important combinatorial optimization problem. The goal is to
find the shortest tour that visits each city in a given list exactly
once and then returns to the starting city. Despite this simple
problem statement, solving the TSP is difficult since it belongs to
the class of NP-complete problems. The importance of the TSP arises
besides from its theoretical appeal from the variety of its
applications. In addition to vehicle routing, many other
applications, e.g., computer wiring, cutting wallpaper, job
sequencing or several data visualization techniques, require the
solution of a TSP. In this paper we introduce the R package TSP
which provides a basic infrastructure for handling and solving the
traveling salesperson problem. The package features S3 classes for
specifying a TSP and its (possibly optimal) solution as well as
several heuristics to find good solutions. In addition, it provides
an interface to Concorde, one of the best exact TSP solvers currently
available.},
nopdf = {http://michael.hahsler.net/research/TSP_working2006/TSP.pdf},
url = {http://epub.wu.ac.at/id/eprint/1230},
category = {seriation, visualization}
}
@article{hahsler:Hahsler2006e,
author = {Christoph Breidert and Michael Hahsler and Thomas Reutterer},
title = {A Review of Methods for Measuring Willingness-to-Pay},
journal = {Innovative Marketing},
volume = {2},
number = {4},
pages = {8--32},
year = {2006},
abstract = {Knowledge about a product's willingness-to-pay on behalf of its (potential)
customers plays a crucial role in many areas of marketing management
like pricing decisions or new product development. Numerous approaches
to measure willingness-to-pay with differential conceptual foundations
and methodological implications have been presented in the relevant
literature so far. This article provides the reader with a systematic
overview of the relevant literature on these competing approaches
and associated schools of thought, recognizes their respective merits
and discusses obstacles and issues regarding their adoption to measuring
willingness-to-pay. Because of its practical relevance, special focus
will be put on indirect surveying techniques and, in particular,
conjoint-based applications will be discussed in more detail. The
strengths and limitations of the individual approaches are discussed
and evaluated from a managerial point of view.},
issn = {1814-2427},
url = {http://www.businessperspectives.org/en/journalim},
pdf = {http://michael.hahsler.net/research/wtp_innovative_marketing2006/wtp_breidert_hahsler_reutterer_preprint.pdf},
category = {marketing}
}
@article{hahsler:Hahsler2006a,
author = {Michael Hahsler},
title = {A Model-Based Frequency Constraint for Mining Associations from Transaction
Data},
journal = {Data Mining and Knowledge Discovery},
year = {2006},
volume = {13},
pages = {137--166},
number = {2},
month = {September},
abstract = {Mining frequent itemsets is a popular method for finding associated
items in databases. For this method, support, the co-occurrence frequency
of the items which form an association, is used as the primary indicator
of the associations's significance. A single user-specified support
threshold is used to decided if associations should be further investigated.
Support has some known problems with rare items, favors shorter itemsets
and sometimes produces misleading associations. In this paper we
develop a novel model-based frequency constraint as an alternative
to a single, user-specified minimum support. The constraint utilizes
knowledge of the process generating transaction data by applying
a simple stochastic mixture model (the NB model) which allows for
transaction data's typically highly skewed item frequency distribution.
A user-specified precision threshold is used together with the model
to find local frequency thresholds for groups of itemsets. Based
on the constraint we develop the notion of NB-frequent itemsets and
adapt a mining algorithm to find all NB-frequent itemsets in a database.
In experiments with publicly available transaction databases we show
that the new constraint provides improvements over a single minimum
support threshold and that the precision threshold is more robust
and easier to set and interpret by the user. },
doi = {10.1007/s10618-005-0026-2},
issn = {1384-5810},
pdf = {http://michael.hahsler.net/research/nbd_dami2005/nbd_associationrules_dami2005.pdf},
url = {http://dx.doi.org/10.1007/s10618-005-0026-2},
category = {association rules}
}
@techreport{hahsler:Hahsler2006c,
author = {Michael Hahsler and Kurt Hornik},
title = {New Probabilistic Interest Measures for Association Rules},
institution = {Research Report Series, Department of Statistics and Mathematics,
Wirtschaftsuniversit{\"a}t Wien},
year = {2006},
type = {Report},
number = {38},
address = {Augasse 2--6, 1090 Wien, Austria},
month = {August},
abstract = { Mining association rules is an important technique for discovering
meaningful patterns in transaction databases. Many different measures
of interestingness have been proposed for association rules. However,
these measures fail to take the probabilistic properties of the mined
data into account. In this paper, we start with presenting a simple
probabilistic framework for transaction data which can be used to
simulate transaction data when no associations are present. We use
such data and a real-world database from a grocery outlet to explore
the behavior of confidence and lift, two popular interest measures
used for rule mining. The results show that confidence is systematically
influenced by the frequency of the items in the left hand side of
rules and that lift performs poorly to filter random noise in transaction
data. Based on the probabilistic framework we develop two new interest
measures, hyper-lift and hyper-confidence, which can be used to filter
or order mined association rules. The new measures show significant
better performance than lift for applications where spurious rules
are problematic. },
nopdf = {http://michael.hahsler.net/research/arules_working2006/hyperConfidence.pdf},
url = {http://epub.wu.ac.at/id/eprint/1286},
category = {association rules}
}
@incollection{hahsler:Hahsler2006f,
author = {Michael Hahsler and Kurt Hornik and Thomas Reutterer},
title = {{Warenkorbanalyse mit Hilfe der Statistik-Software R}},
booktitle = {Innovationen in Marketing},
year = {2006},
editor = {Peter Schnedlitz and Renate Buber and Thomas Reutterer and Arnold
Schuh and Christoph Teller},
pages = {144--163},
publisher = {Linde-Verlag},
abstract = {Die Warenkorb- oder Sortimentsverbundanalyse bezeichnet eine Reihe
von Methoden zur Untersuchung der bei einem Einkauf gemeinsam nachgefragten
Produkte oder Kategorien aus einem Handelssortiment. In diesem Beitrag
wird die explorative Warenkorbanalyse n{\"a}her beleuchtet, welche eine
Verdichtung und kompakte Darstellung der in (zumeist sehr umfangreichen)
Transaktionsdaten des Einzelhandels auffindbaren Verbundbeziehungen
beabsichtigt. Mit einer enormen Anzahl an verf{\"u}gbaren Erweiterungspaketen
bietet sich die frei verf{\"u}gbare Statistik-Software R als ideale Basis
f{\"u}r die Durchf{\"u}hrung solcher Warenkorbanalysen an. Die im Erweiterungspaket
arules vorhandene Infrastruktur f{\"u}r Transaktionsdaten stellt eine
flexible Basis f{\"u}r die Warenkorbanalyse bereit. Unterst{\"u}tzt wird
die effiziente Darstellung, Bearbeitung und Analyse von Warenkorbdaten
mitsamt beliebigen Zusatzinformationen zu Produkten (zum Beispiel
Sortimentshierarchie) und zu Transaktionen (zum Beispiel Umsatz oder
Deckungsbeitrag). Das Paket ist nahtlos in R integriert und erm{\"o}glicht
dadurch die direkte Anwendung von bereits vorhandenen modernsten
Verfahren f{\"u}r Sampling, Clusterbildung und Visualisierung von Warenkorbdaten.
Zus{\"a}tzlich sind in arules g{\"a}ngige Algorithmen zum Auffinden von Assoziationsregeln
und die notwendigen Datenstrukturen zur Analyse von Mustern vorhanden.
Eine Auswahl der wichtigsten Funktionen wird anhand eines realen
Transaktionsdatensatzes aus dem Lebensmitteleinzelhandel demonstriert.},
pdf = {http://michael.hahsler.net/research/arules_WUCompDay2006/arules.pdf},
url = {http://www.lindeverlag.at/verlag/buecher/978-3-7143-0080-2},
category = {association rules, marketing}
}
@inproceedings{hahsler:Hahsler2006b,
author = {Michael Hahsler and Kurt Hornik and Thomas Reutterer},
title = {Implications of Probabilistic Data Modeling for Mining Association
Rules},
booktitle = {From Data and Information Analysis to Knowledge Engineering, Proceedings
of the 29th Annual Conference of the Gesellschaft f{\"u}r Klassifikation
e.V., University of Magdeburg, March 9--11, 2005},
year = {2006},
editor = {M. Spiliopoulou and R. Kruse and C. Borgelt and A. N{\"u}rnberger
and W. Gaul},
series = {Studies in Classification, Data Analysis, and Knowledge Organization},
pages = {598--605},
publisher = {Springer-Verlag},
abstract = {Mining association rules is an important technique for discovering
meaningful patterns in transaction databases. In the current literature,
the properties of algorithms to mine association rules are discussed
in great detail. We present a simple probabilistic framework for
transaction data which can be used to simulate transaction data when
no associations are present. We use such data and a real-world grocery
database to explore the behavior of confidence and lift, two popular
interest measures used for rule mining. The results show that confidence
is systematically influenced by the frequency of the items in the
left-hand-side of rules and that lift performs poorly to filter random
noise in transaction data. The probabilistic data modeling approach
presented in this paper not only is a valuable framework to analyze
interest measures but also provides a starting point for further
research to develop new interest measures which are based on statistical
tests and geared towards the specific properties of transaction data.},
pdf = {http://michael.hahsler.net/research/probRuleMining_gfkl2005/probRuleMining_gfkl2005.pdf},
url = {http://www.springerlink.com/content/978-3-540-31314-4/},
category = {association rules}
}
@inproceedings{hahsler:Breidert2005,
author = {Christoph Breidert and Michael Hahsler and Lars Schmidt-Thieme},
title = {Reservation Price Estimation by Adaptive Conjoint Analysis},
booktitle = {Classification - the Ubiquitous Challenge, Proceedings of the 28th
Annual Conference of the Gesellschaft f{\"u}r Klassifikation e.V.,
University of Dortmund, March 9--11, 2004},
year = {2005},
editor = {Weihs, Claus and Gaul, Wolfgang},
series = {Studies in Classification, Data Analysis, and Knowledge Organization},
pages = {577--584},
publisher = {Springer-Verlag},
abstract = {Though reservation prices are needed for many business decision processes,
e.g., pricing new products, it often turns out to be difficult to
measure them. Many researchers reuse conjoint analysis data with
price as an attribute for this task (e.g., Kohli and Mahajan (1991)).
In this setting the information if a consumer buys a product at all
is not elicited which makes reservation price estimation impossible.
We propose an additional interview scene at the end of the adaptive
conjoint analysis (Johnson (1987)) to estimate reservation prices
for all product configurations. This will be achieved by the usage
of product stimuli as well as price scales that are adapted for each
proband to reflect individual choice behavior. We present preliminary
results from an ongoing large-sample conjoint interview of customers
of a major mobile phone retailer in Germany.},
pdf = {http://michael.hahsler.net/research/reservation_gfkl2004/gfkl2004.pdf},
url = {http://www.springerlink.com/content/978-3-540-28084-2/},
category = {marketing}
}
@inproceedings{hahsler:Fessler2005,
author = {Georg Fessler and Michael Hahsler and Michaela Putz},
title = {{ePubWU -- Erfahrungen mit einer Volltext an der Wirtschaftsuniversit{\"a}t
Wien}},
booktitle = {Bibliotheken -- Fundament der Bildung, 28. \"Osterreichischer Bibliothekartag
2004},
year = {2005},
editor = {Christian Enichlmayr},
series = {Schriftenreihe der O{\"o}. Landesbibliothek},
pages = {190--193},
abstract = {ePubWU ist eine elektronische Plattform f\"ur wissenschaftliche Publikationen
der Wirtschaftsuniversit\"at Wien, wo forschungsbezogene Ver\"offentlichungen
der WU im Volltext \"uber das WWW zug\"anglich gemacht werden. ePubWU
wird als Gemeinschaftsprojekt der Universit\"atsbibliothek der Wirtschaftsuniversit\"at
Wien und der Abteilung f\"ur Informationswirtschaft betrieben. Derzeit
werden in ePubWU zwei Publikationsarten gesammelt - Working Papers
und Dissertationen. In dem Beitrag werden Erfahrungen der \"uber zweij\"ahrigen
Laufzeit des Projektes dargestellt, u.a. in den Bereichen Akquisition,
Workflows, Erschlie{\ss}ung, Vermittlung.},
isbn = {3-85252-684-1},
category = {digital libraries}
}
@incollection{hahsler:Hahsler2004a,
author = {Michael Hahsler},
title = {A Quantitative Study of the Adoption of Design Patterns by Open Source
Software Developers},
booktitle = {Free/Open Source Software Development},
publisher = {Idea Group Publishing},
year = {2005},
editor = {S. Koch},
pages = {103--123},
abstract = {Several successful projects (Linux, Free-BSD, BIND, Apache, etc.)
showed that the collaborative and self-organizing process of developing
open source software produces reliable, high quality software. Without
doubt, the open source software development process differs in many
ways from the traditional development process in a commercial environment.
An interesting research question is how these differences influence
the adoption of traditional software engineering practices. In this
chapter we investigate how design patterns, a widely accepted software
engineering practice, are adopted by open source developers for documenting
changes. We analyze the development process of almost 1,000 open
source software projects using version control information and explore
differences in pattern adoption using characteristics of projects
and developers. By analyzing these differences we provide evidence
that design patterns are an important practice in open source projects
and that there exist significant differences between developers who
use design patterns and who do not.},
pdf = {http://michael.hahsler.net/research/patterns_oss2004/OSS_patterns_preprint.pdf},
url = {http://www.idea-group.com/books/details.asp?id=4368},
category = {software engineering}
}
@inproceedings{hahsler:Hahsler2005e,
author = {Michael Hahsler},
title = {Optimizing Web Sites for Customer Retention},
booktitle = {Proceedings of the 2005 International Workshop on Customer Relationship
Management: Data Mining Meets Marketing, November 18--19, 2005, New
York City, USA},
year = {2005},
editor = {Bing Liu and Myra Spiliopoulou and Jaideep Srivastava and Alex Tuzhilin},
abstract = {With customer relationship management (CRM) companies move away from
a mainly product-centered view to a customer-centered view. Resulting
from this change, the effective management of how to keep contact
with customers throughout different channels is one of the key success
factors in today's business world. Company Web sites have evolved
in many industries into an extremely important channel through which
customers can be attracted and retained. To analyze and optimize
this channel, accurate models of how customers browse through the
Web site and what information within the site they repeatedly view
are crucial. Typically, data mining techniques are used for this
purpose. However, there already exist numerous models developed in
marketing research for traditional channels which could also prove
valuable to understanding this new channel. In this paper we propose
the application of an extension of the Logarithmic Series Distribution
(LSD) model repeat-usage of Web-based information and thus to analyze
and optimize a Web Site's capability to support one goal of CRM,
to retain customers. As an example, we use the university's blended
learning web portal with over a thousand learning resources to demonstrate
how the model can be used to evaluate and improve the Web site's
effectiveness.},
pdf = {http://michael.hahsler.net/research/LSD_CRM2005/LSD_CRM2005.pdf},
category = {marketing, recommender systems}
}
@techreport{hahsler:Hahsler2005c,
author = {Michael Hahsler and Bettina Gr{\"u}n and Kurt Hornik},
title = {A Computational Environment for Mining Association Rules and Frequent
Item Sets},
institution = {Research Report Series, Department of Statistics and Mathematics,
Wirtschaftsuniversit{\"a}t Wien},
year = {2005},
type = {Report},
number = {15},
address = {Augasse 2--6, 1090 Wien, Austria},
month = {April},
abstract = { Mining frequent itemsets and association rules is a popular and well
researched approach to discovering interesting relationships between
variables in large databases. The R package arules presented in this
paper provides a basic infrastructure for creating and manipulating
input data sets and for analyzing the resulting itemsets and rules.
The package also includes interfaces to two fast mining algorithms,
the popular C implementations of Apriori and Eclat by Christian Borgelt.
These algorithms can be used to mine frequent itemsets, maximal frequent
itemsets, closed frequent itemsets and association rules. },
nopdf = {http://michael.hahsler.net/research/arules_workingpaper15_2005/arules.pdf},
url = {http://epub.wu.ac.at/id/eprint/132},
category = {association rules}
}
@article{hahsler:Hahsler2005f,
author = {Michael Hahsler and Bettina Gr{\"u}n and Kurt Hornik},
title = {arules -- {A} Computational Environment for Mining Association Rules
and Frequent Item Sets},
journal = {Journal of Statistical Software},
year = {2005},
volume = {14},
pages = {1--25},
number = {15},
month = {October},
abstract = {Mining frequent itemsets and association rules is a popular and well
researched approach for discovering interesting relationships between
variables in large databases. The R package arules presented in this
paper provides a basic infrastructure for creating and manipulating
input data sets and for analyzing the resulting itemsets and rules.
The package also includes interfaces to two fast mining algorithms,
the popular C implementations of Apriori and Eclat by Christian Borgelt.
These algorithms can be used to mine frequent itemsets, maximal frequent
itemsets, closed frequent itemsets and association rules.},
issn = {1548-7660},
pdf = {http://michael.hahsler.net/research/arules_jss2005/v14i15.pdf},
url = {http://www.jstatsoft.org/v14/i15},
category = {association rules}
}
@techreport{hahsler:Hahsler2005b,
author = {Michael Hahsler and Kurt Hornik and Thomas Reutterer},
title = {Implications of Probabilistic Data Modeling for Rule Mining},
institution = {Research Report Series, Department of Statistics and Mathematics,
Wirtschaftsuniversit{\"a}t Wien},
year = {2005},
type = {Report},
number = {14},
address = {Augasse 2--6, 1090 Wien, Austria},
month = {March},
abstract = { Mining association rules is an important technique for discovering
meaningful patterns in transaction databases. In the current literature,
the properties of algorithms to mine associations are discussed in
great detail. In this paper we investigate properties of transaction
data sets from a probabilistic point of view. We present a simple
probabilistic framework for transaction data and its implementation
using the R statistical computing environment. The framework can
be used to simulate transaction data when no associations are present.
We use such data to explore the ability to filter noise of confidence
and lift, two popular interest measures used for rule mining. Based
on the framework we develop the measure hyperlift and we compare
this new measure to lift using simulated data and a real-world grocery
database. },
nopdf = {http://michael.hahsler.net/research/probDataMining_wp2005/hyperlift.pdf},
url = {http://epub.wu.ac.at/id/eprint/764},
category = {association rules}
}
@inproceedings{hahsler:Hahsler2005,
author = {Michael Hahsler and Stefan Koch},
title = {Discussion of a large-scale open source data collection methodology},
booktitle = {38th Annual Hawaii International Conference on System Sciences (HICSS'05),
January 3--6, 2005 Hilton Waikoloa Village, Big Island, Hawaii},
year = {2005},
publisher = {IEEE Computer Society Press},
abstract = { In this paper we discusses in detail a possible methodology for collecting
repository data on a large number of open source software projects
from a single project hosting and community site. The process of
data retrieval is described along with the possible metrics that
can be computed and which can be used for further analyses. Example
research areas to be addressed with the available data and first
results are given. Then, both advantages and disadvantages of the
proposed methodology are discussed together with implications for
future approaches.},
pdf = {http://michael.hahsler.net/research/oss_hicss2005/oss_hicss2005.pdf},
url = {http://csdl.computer.org/comp/proceedings/hicss/2005/2268/07/22680197babs.htm},
category = {software engineering}
}
@misc{hahsler:Fessler2003,
author = {Georg Fessler and Michael Hahsler and Michaela Putz and Judith Schwarz
and Brigitta Wiebogen},
title = {{Projektbericht ePubWU 2001--2003}},
howpublished = {Augasse 2--6, 1090 Wien, Wirtschaftsuniversit{\"a}t Wien},
month = jan,
year = {2004},
abstract = {ePubWU ist eine elektronische Plattform f{\"u}r wissenschaftliche
Publikationen der Wirtschaftsuniversit{\"a}t Wien, wo forschungsbezogene
Ver{\"o}ffentlichungen der WU im Volltext {\"u}ber das WWW zug{\"a}nglich
gemacht werden. ePubWU ist seit J{\"a}nner 2002 im Echtbetrieb und
wird als Gemeinschaftsprojekt der Universit{\"a}tsbibliothek der
Wirtschaftsuniversit{\"a}t Wien und der Abteilung f{\"u}r Informationswirtschaft
betrieben. Dieser Bericht beinhaltet die Erfahrungen aus der 2-j{\"a}hrigen
Pilotphase des Projekts.},
address = {Augasse 2--6, 1090 Wien},
institution = {Wirtschaftsuniversit{\"a}t Wien},
pdf = {http://michael.hahsler.net/research/ePub_bericht_2004/ePub-Projektbericht_01-03.pdf},
category = {digital libraries}
}
@techreport{hahsler:Hafner2004,
author = {Susanne Hafner and Michael Hahsler},
title = {{Preisvergleich zwischen Online-Shops und traditionellen Gesch{\"a}ften:
Fallstudie Spieleeinzelhandel}},
institution = {Working Papers on Information Processing and Information Management,
Institut f{\"u}r Informationsverarbeitung und -wirtschaft, Wirtschaftsuniversit{\"a}t
Wien},
year = {2004},
type = {Working Paper},
number = {04/2004},
address = {Augasse 2--6, 1090 Wien, Austria},
month = aug,
abstract = { Die vorliegende Arbeit besch{\"a}ftigt sich mit dem Preisvergleich
zwischen Online-Shops und traditionellen Gesch{\"a}ften. In einigen
Studien wurde bisher versucht Preisunterschiede zwischen online und
traditionellen Gesch{\"a}ften nachzuweisen, um die These, dass Online-M{\"a}rkte
aufgrund h{\"o}herer Transparenz und niedrigerer Transaktionskosten
effizienter sind, zu best{\"a}tigen. Studien untersuchten bisher
Produktgruppen wie CDs und B{\"u}cher. In dieser Studie besch{\"a}ftigen
wir uns mit dem bisher noch nicht untersuchten Spieleeinzelhandel
und konzentrieren uns dabei auf den {\"o}sterreichischen Markt. Es
soll untersucht werden, ob der {\"o}sterreichische Markt {\"a}hnliche
oder andere Ergebnisse liefert als die bisher untersuchten M{\"a}rkte
(haupts{\"a}chlich im nordamerikanischer Raum). Die Untersuchung
zeigt folgendes: Die Preise f{\"u}r Spiele sind im elektronischen
Markt um ca. 20 Prozent niedriger als im traditionellen Markt. Die
Preisstreuungen im elektronischen und traditionellen Markt unterscheiden
sich nicht signifikant. Beide Ergebnisse decken sich mit den Ergebnissen
anderer Studien. Damit ist der {\"o}sterreichische Online-Brettspieleinzelhandel
{\"a}hnlich entwickelt wie der Online-Handel in anderen L{\"a}ndern
und f{\"u}r andere Produktgruppen. },
nopdf = {http://michael.hahsler.net/research/pricing_study_working2004/pricing_study_WP.pdf},
url = {http://epub.wu.ac.at/id/eprint/828},
category = {marketing}
}
@techreport{hahsler:Hahsler2004c,
author = {Michael Hahsler},
title = {A Model-Based Frequency Constraint for Mining Associations from Transaction
Data},
institution = {Working Papers on Information Processing and Information Management,
Institut f{\"u}r Informationsverarbeitung und -wirtschaft, Wirtschaftsuniversit{\"a}t
Wien},
year = {2004},
type = {Working Paper},
number = {07/2004},
address = {Augasse 2--6, 1090 Wien, Austria},
month = nov,
abstract = { In this paper we develop an alternative to minimum support which
utilizes knowledge of the process which generates transaction data
and allows for highly skewed frequency distributions. We apply a
simple stochastic model (the NB model), which is known for its usefulness
to describe item occurrences in transaction data, to develop a frequency
constraint. This model-based frequency constraint is used together
with a precision threshold to find individual support thresholds
for groups of associations. We develop the notion of NB-frequent
itemsets and present two mining algorithms which find all NB-frequent
itemsets in a database. In experiments with publicly available transaction
databases we show that the new constraint can provide significant
improvements over a single minimum support threshold and that the
precision threshold is easier to use. },
nopdf = {http://michael.hahsler.net/research/nbd_working2004/nbd_associationrules_WP.pdf},
url = {http://epub.wu.ac.at/id/eprint/1760},
category = {association rules}
}
@inproceedings{hahsler:Hahsler2004b,
author = {Michael Hahsler and Stefan Koch},
title = {Cooperation and disruptive behaviour - Learning from a multi-player
Internet gaming community},
booktitle = {IADIS International Conference Web Based Communities 2004, Lisbon,
Portugal, 24--26 March 2004},
year = {2004},
editor = {Piet Kommers and Pedro Isaias and Miguel Baptista Nunes},
pages = {35--42},
publisher = {International Association for Development of the Information Society
(IADIS)},
abstract = { In this paper we report possibilities and experiences from employing
Counter-Strike, a popular multi-player Internet computer game and
its resulting online community in research on cooperative behaviour.
Advantages from using this game include easy availability of rich
data, the emphasis on team-playing, as well as numerous possibilities
to change the experiment settings. We use descriptive game theory
and statistical methods to explore cooperation within the game as
well as the way the player community deals with disruptive behaviour.
After a quick introduction to the basic rules of Counter-Strike,
we describe the setup of the Internet game server used. We then present
empirical results from the game server logs where cooperation within
the game is analyzed from a game theoretic perspective. Finally we
discuss the applications of our results to other online communities,
including cooperation and self-regulation in open source teams.},
pdf = {http://michael.hahsler.net/research/webBasedComm_cs/webBasedComm_cs.pdf},
url = {http://www.iadis.net/dl/Search_list_open.asp?code=730},
category = {software engineering}
}
@inproceedings{hahsler:Bernroider2003a,
author = {Edward Bernroider and Michael Hahsler and Stefan Koch and Volker
Stix},
title = {{Data Envelopment Analysis zur Unterst{\"u}tzung der Auswahl und
Einf{\"u}hrung von ERP-Systemen}},
booktitle = {Informationswirtschaft: Ein Sektor mit Zukunft, Symposium 4.--5.
September 2003, Wien, {{\"O}}sterreich},
year = {2003},
editor = {Andreas Geyer-Schulz and Alfred Taudes },
series = {Lecture Notes in Informatics (LNI) P-33},
pages = {11--26},
publisher = {Gesellschaft f{\"u}r Informatik},
abstract = {Immer mehr Unternehmen setzen betriebswirtschaftliche Standardsoftwarepakete
wie beispielsweise SAP R/3 oder BaaN ein. Die Auswahl und die Einf{\"u}hrung
solcher Systeme stellt f{\"u}r die meisten Unternehmen ein strategisch
wichtiges IT-Projekt dar, das mit massiven Risiken verbunden ist.
Bei der Auswahl des am besten geeigneten Systems gilt es einen Gruppenentscheidungsprozess
zu unterst{\"u}tzen. Das darauf folgende Einf{\"u}hrungsprojekt muss
effizient, den ''best practices'' entsprechend, durchgef{\"u}hrt
werden. In dieser Arbeit wird anhand von Beispielen aufgezeigt, wie
beide Prozesse - die Auswahl und die Einf{\"u}hrung - durch die Data
Envelopment Analysis unterst{\"u}tzt werden k\"onnen.},
url = {http://www.gi-ev.de/},
category = {marketing}
}
@incollection{hahsler:Geyer-Schulz2003e,
author = {Andreas Geyer-Schulz and Michael Hahsler},
title = {Comparing two Recommender Algorithms with the Help of Recommendations
by Peers},
booktitle = {WEBKDD 2002 - Mining Web Data for Discovering Usage Patterns and
Profiles 4th International Workshop, Edmonton, Canada, July 2002,
Revised Papers},
publisher = {Springer-Verlag},
year = {2003},
editor = {O.R. Zaiane and J. Srivastava and M. Spiliopoulou and B. Masand},
series = {Lecture Notes in Computer Science LNAI 2703},
pages = {137--158},
abstract = {Since more and more Web sites, especially sites of retailers, offer
automatic recommendation services using Web usage mining, evaluation
of recommender algorithms has become increasingly important. In this
paper we present a framework for the evaluation of different aspects
of recommender systems based on the process of discovering knowledge
in databases introduced by Fayyad et al. and we summarize research
already done in this area. One aspect identified in the presented
evaluation framework is widely neglected when dealing with recommender
algorithms. This aspect is to evaluate how useful patterns extracted
by recommender algorithms are to support the social process of recommending
products to others, a process normally driven by recommendations
by peers or experts. To fill this gap for recommender algorithms
based on frequent itemsets extracted from usage data we evaluate
the usefulness of two algorithms. The first recommender algorithm
uses association rules, and the other algorithm is based on the repeat-buying
theory known from marketing research. We use 6 months of usage data
from an educational Internet information broker and compare useful
recommendations identified by users from the target group of the
broker (peers) with the recommendations produced by the algorithms.
The results of the evaluation presented in this paper suggest that
frequent itemsets from usage histories match the concept of useful
recommendations expressed by peers with satisfactory accuracy (higher
than 70\%) and precision (between 60\% and 90\%). Also the evaluation
suggests that both algorithms studied in the paper perform similar
on real-world data if they are tuned properly.},
note = {(Revised version of the WEBKDD 2002 paper ``Evaluation of Recommender
Algorithms for an Internet Information Broker based on Simple
Association Rules and on the Repeat-Buying Theory'')},
pdf = {http://michael.hahsler.net/research/recomm_lnai2002/lnai2002.pdf},
url = {http://www.springeronline.com/sgw/cda/frontpage/0,10735,5-146-22-14095354-0,00.html},
category = {recommender systems, association rules}
}
@incollection{hahsler:GeyerSchulz2003c,
author = {Andreas Geyer-Schulz and Michael Hahsler and Andreas Neumann and
Anke Thede},
title = {Behavior-Based Recommender Systems as Value-Added Services for Scientific
Libraries},
booktitle = {Statistical Data Mining \& Knowledge Discovery},
publisher = {Chapman \& Hall / CRC},
year = {2003},
editor = {Hamparsum Bozdogan},
pages = {433--454},
month = jul,
abstract = { Amazon.com paved the way for several large-scale, behavior-based
recommendation services as an important value-added expert advice
service for online book shops. In this contribution we discuss the
effects (and possible reductions of transaction costs) for such services
and investigate how such value-added services can be implemented
in context of scientific libraries. For this purpose we present a
new, recently developed recommender system based on a stochastic
purchase incidence model, present the underlying stochastic model
from repeat-buying theory and analyze whether the underlying assumptions
on consumer behavior holds for users of scientific libraries, too.
We analyzed the logfiles with approximately 85 million HTTP-transactions
of the web-based online public access catalog (OPAC) of the library
of the Universit{\"a}t Karlsruhe (TH) since January 2001 and performed
some diagnostic checks. The recommender service is fully operational
within the library system of the Universit{\"a}t Karlsruhe (TH) since
2002/06/22. },
url = {http://www.crcpress.com/shopping_cart/products/product_detail.asp?sku=C3448&parent_id=&pc=},
category = {recommender systems}
}
@inproceedings{hahsler:GeyerSchulz2003d,
author = {Andreas Geyer-Schulz and Michael Hahsler and Andreas Neumann and
Anke Thede},
title = {{Recommenderdienste f{\"u}r wissenschaftliche Bibliotheken und Bibliotheksverb{\"u}nde}},
booktitle = {Informationswirtschaft: Ein Sektor mit Zukunft, Symposium 4.--5.
September 2003, Wien, {{\"O}}sterreich},
year = {2003},
editor = {Andreas Geyer-Schulz and Alfred Taudes },
series = {Lecture Notes in Informatics (LNI) P-33},
pages = {43--58},
publisher = {Gesellschaft f{\"u}r Informatik},
abstract = {Wissenschaftliche Bibliotheken stellen ein vielversprechendes Anwendungsfeld
f{\"u}r Recommenderdienste dar. Wissenschaftliche Bibliotheken k{\"o}nnen
leicht kundenzentrierte Serviceportale im Stil von amazon.com entwickeln.
Studenten, Universit{\"a}tslehrer und -forscher k{\"o}nnen ihren
Anteil an den Transaktionskosten (z.B. Such- und Bewertungskosten
f{\"u}r Informationsprodukte) reduzieren. F{\"u}r Bibliothekare liegt
der Vorteil in einer Verbesserung der Kundenberatung durch Empfehlungen
und einer zus{\"a}tzlichen Unterst{\"u}tzung bei der Marktforschung,
Produktbewertung und dem Bestandsmanagement. In diesem Beitrag pr{\"a}sentieren
wir eine Strategie, mit der verhaltensbasierte, verteilte Recommenderdienste
in bestehende Bibliothekssysteme mit minimalem Aufwand integriert
werden k{\"o}nnen und berichten {\"u}ber unsere Erfahrungen bei der
Einf{\"u}hrung eines solchen Dienstes an der Universit{\"a}tsbibliothek
der Universit{\"a}t Karlsruhe (TH).},
url = {http://www.gi-ev.de/},
category = {recommender systems}
}
@inproceedings{hahsler:GeyerSchulz2003a,
author = {Andreas Geyer-Schulz and Michael Hahsler and Andreas Neumann and
Anke Thede},
title = {An Integration Strategy for Distributed Recommender Services in Legacy
Library Systems},
booktitle = {Between Data Science and Applied Data Analysis, Proceedings of the
26th Annual Conference of the Gesellschaft f{\"u}r Klassifikation
e.V., University of Mannheim, July 22--24, 2002},
year = {2003},
editor = {M. Schader and W. Gaul and M. Vichi},
series = {Studies in Classification, Data Analysis, and Knowledge Organization},
pages = {412--420},
month = jul,
publisher = {Springer-Verlag},
abstract = { Scientific library systems are a very promising application area
for recommender services. Scientific libraries could easily develop
customer-oriented service portals in the style of amazon.com. Students,
university teachers and researchers can reduce their transaction
cost (i.e. search and evaluation cost of information products). For
librarians, the advantage is an improvement of the customer support
by recommendations and the additional support in marketing research,
product evaluation, and book selection. In this contribution we present
a strategy for integrating a behavior-based distributed recommender
service in legacy library systems with minimal changes in the legacy
system. },
url = {http://www.springer.com/east/home/business/business+information+systems?SGWID=5-170-69-173622621-0},
category = {recommender systems}
}
@inproceedings{hahsler:GeyerSchulz2003b,
author = {Andreas Geyer-Schulz and Michael Hahsler and Anke Thede},
title = {Comparing association-rules and repeat-buying based recommender systems
in a {B2B} environment},
booktitle = {Between Data Science and Applied Data Analysis, Proceedings of the
26th Annual Conference of the Gesellschaft f{\"u}r Klassifikation
e.V., University of Mannheim, July 22--24, 2002},
year = {2003},
editor = {M. Schader and W. Gaul and M. Vichi},
series = {Studies in Classification, Data Analysis, and Knowledge Organization},
pages = {421--429},
month = jul,
publisher = {Springer-Verlag},
abstract = { In this contribution we present a systematic evaluation and comparison
of recommender systems based on simple association rules and on repeat-buying
theory. Both recommender services are based on the customer purchase
histories of a medium-sized B2B-merchant for computer accessories.
With the help of product managers an evaluation set for recommendations
was generated. With regard to this evaluation set, recommendations
produced by both methods are evaluated and several error measures
are computed. This provides an empirical test whether frequent item
sets or outliers of a stochastic purchase incidence model are suitable
concepts for automatically generation recommendations. Furthermore,
the loss function (performance measures) of the two models are compared
and the sensitivity with regard to a misspecification of the model
parameters is discussed. },
url = {http://www.springerlink.com/content/978-3-540-20304-9/},
category = {recommender systems}
}
@techreport{hahsler:Hahsler2003,
author = {Michael Hahsler},
title = {A Quantitative Study of the Application of Design Patterns in Java},
institution = {Working Papers on Information Processing and Information Management,
Institut f{\"u}r Informationsverarbeitung und -wirtschaft, Wirtschaftsuniversit{\"a}t
Wien},
year = {2003},
type = {Working Paper},
number = {01/2003},
address = {Augasse 2--6, 1090 Wien, Austria},
month = jan,
abstract = { Using design patterns is a widely accepted method to improve software
development. There are many benefits of the application of patterns
claimed in the literature. The most cited claim is that design patterns
can provide a common design vocabulary and therefore improve greatly
communication between software designers. Most of the claims are
supported by experiences reports of practitioners, but there is a
lack of quantitative research concerning the actual application of
design patterns and about the realization of the claimed benefits.
In this paper we analyze the development process of over 1000 open
source software projects using version control information. We explore
this information to gain an insight into the differences of software
development with and without design patterns. By analyzing these
differences we provide evidence that design patterns are used for
communication and that there is a significant difference between
developers who use design patterns and who do not. },
html = {http://michael.hahsler.net/research/patterns_working2003/designpatterns_java.html},
nopdf = {http://michael.hahsler.net/research/patterns_working2003/designpatterns_java.pdf},
url = {http://epub.wu.ac.at/id/eprint/1646},
category = {software engineering}
}
@article{hahsler:Hahsler2003b,
author = {Michael Hahsler},
title = {Integrating Digital Document Acquisition into a University Library:
A Case Study of Social and Organizational Challenges},
journal = {Journal of Digital Information Management},
year = {2003},
volume = {1},
pages = {162--171},
number = {4},
month = dec,
abstract = {In this article we report on the effort of the university library
of the Vienna University of Economics and Business Administration
to integrate a digital library component for research documents authored
at the university into the existing library infrastructure. Setting
up a digital library has become a relatively easy task using the
current data base technology and the components and tools freely
available. However, to integrate such a digital library into existing
library systems and to adapt existing document acquisition work-flows
in the organization are non-trivial tasks. We use a research frame
work to identify the key players in this change process and to analyze
their incentive structures. Then we describe the light-weight integration
approach employed by our university and show how it provides incentives
to the key players and at the same time requires only minimal adaptation
of the organization in terms of changing existing work-flows. Our
experience suggests that this light-weight integration offers a cost
efficient and low risk intermediate step towards switching to exclusive
digital document acquisition.},
issn = {0972-7272},
pdf = {http://michael.hahsler.net/research/ePub_jdim2003/IntegratingDDAcquisition_final.pdf},
url = {http://www.dirf.org/jdim/v1i4.htm},
category = {digital libraries}
}
@inproceedings{hahsler:GeyerSchulz2002,
author = {Walter B{\"o}hm and Andreas Geyer-Schulz and Michael Hahsler and
Maximillian Jahn},
title = {Repeat Buying Theory and its Application for Recommender Services},
booktitle = {{Exploratory Data Analysis in Empirical Research, Proceedings of
the 25th Annual Conference of the Gesellschaft f{\"u}r Klassifikation
e.V., University of Munich, March 14--16, 2001}},
year = {2002},
editor = {O. Opitz and M. Schwaiger},
pages = {229--239},
publisher = {Springer-Verlag},
abstract = {In the context of a virtual university's information broker we study
the consumption patterns for information goods and we investigate
if Ehrenberg's repeat-buying theory which successfully models regularities
in a large number of consumer product markets can be applied in electronic
markets for information goods too. First results indicate that Ehrenberg's
repeat-buying theory succeeds in describing the consumption patterns
of bundles of complementary information goods reasonably well and
that this can be exploited for automatically generating anonymous
recommendation services based on such information bundles. An experimental
anonymous recommender service has been implemented and is currently
evaluated in the Virtual University of the Vienna University of Economics
and Business Administration at http://vu.wu-wien.ac.at.},
pdf = {http://michael.hahsler.net/research/recomm_gfkl2001/gfkl2001.pdf},
url = {http://www.springer.com/east/home/business/business+information+systems?SGWID=5-170-69-173622621-0},
category = {recommender systems}
}
@article{hahsler:GeyerSchulz2002a,
author = {Wolfgang Gaul and Andreas Geyer-Schulz and Michael Hahsler and Lars
Schmidt-Thieme},
title = {{eMarketing mittels Recommendersystemen}},
journal = {{Marketing ZFP}},
year = {2002},
volume = {24},
pages = {47--55},
abstract = {Recommendersysteme liefern einen wichtigen Beitrag f{\"u}r die Ausgestaltung
von eMarketing Aktivit{\"a}ten. Ausgehend von einer Diskussion von
Input/Output Charakteristika zur Beschreibung solcher Systeme, die
bereits eine geeignete Unterscheidung praxisrelevanter Erscheinungsformen
erlauben, wird motiviert, warum eine solche Charakterisierung durch
die Einbeziehung methodischer Aspekte aus der Marketing Forschung
angereichert werden muss. Ein auf der Theorie des Wiederkaufverhaltens
basierendes Recommendersystem sowie ein System, das Empfehlungen
mittels Analyse des Navigationsverhaltens von Site Besuchern erzeugt,
werden vorgestellt. Am Beispiel der Amazon Site werden die Marketing
M{\"o}glichkeiten von Recommendersystemen verdeutlicht. Abschlie{\ss}end
wird zur Abrundung auf weitere Literatur mit Recommendersystem Bezug
eingegangen. In einem Ausblick werden Hinweise gegeben, in welche
Richtungen Weiterentwicklungen geplant sind.},
series = {Spezialausgabe ''E-Marketing''},
url = {http://vahlen.becksche.de/zeitschriften/},
category = {recommender systems, marketing}
}
@inproceedings{hahsler:GeyerSchulz2002d,
author = {Andreas Geyer-Schulz and Michael Hahsler},
title = {Evaluation of Recommender Algorithms for an Internet Information
Broker based on Simple Association Rules and on the Repeat-Buying
Theory},
booktitle = {Fourth WEBKDD Workshop: Web Mining for Usage Patterns \& User Profiles},
year = {2002},
editor = {Brij Masand and Myra Spiliopoulou and Jaideep Srivastava and Osmar
R. Zaiane},
pages = {100--114},
address = {Edmonton, Canada},
month = jul,
abstract = {Association rules are a widely used technique to generate recommendations
in commercial and research recommender systems. Since more and more
Web sites, especially of retailers, offer automatic recommender services
using Web usage mining, evaluation of recommender algorithms becomes
increasingly important. In this paper we first present a framework
for the evaluation of different aspects of recommender systems based
on the process of discovering knowledge in databases of Fayyad et
al. and then we focus on the comparison of the performance of two
recommender algorithms based on frequent itemsets. The first recommender
algorithm uses association rules, and the other recommender algorithm
is based on the repeat-buying theory known from marketing research.
For the evaluation we concentrated on how well the patterns extracted
from usage data match the concept of useful recommendations of users.
We use 6 month of usage data from an educational Internet information
broker and compare useful recommendations identified by users from
the target group of the broker with the results of the recommender
algorithms. The results of the evaluation presented in this paper
suggest that frequent itemsets from purchase histories match the
concept of useful recommendations expressed by users with satisfactory
accuracy (higher than 70\%) and precision (between 60\% and 90\%).
Also the evaluation suggests that both algorithms studied in the
paper perform similar on real-world data if they are tuned properly.},
pdf = {http://michael.hahsler.net/research/recomm_webkdd2002/final/webkdd2002.pdf},
category = {recommender systems, association rules}
}
@inproceedings{hahsler:GeyerSchulz2002c,
author = {Andreas Geyer-Schulz and Michael Hahsler},
title = {Software Reuse with Analysis Patterns},
booktitle = {Proceedings of the 8th AMCIS},
year = {2002},
pages = {1156--1165},
address = {Dallas, TX},
month = aug,
publisher = {Association for Information Systems},
abstract = {The purpose of this article is to promote reuse of domain knowledge
by introducing patterns already in the analysis phase of the software
life-cycle. We propose an outline template for analysis patterns
that strongly supports the whole analysis process from the requirements
analysis to the analysis model and further on to its transformation
into a flexible and reusable design and implementation. As an example
we develop a family of analysis patterns in this paper that deal
with a series of pressing problems in cooperative work, collaborative
information filtering and sharing, and knowledge management. We evaluate
the reuse potential of these patterns by analyzing several components
of an information system, that was developed for the Virtual University
project of the Vienna University of Economics and Business Administration.
The findings of this analysis suggest that using patterns in the
analysis phase has the potential to reducing development time significantly
by introducing reuse already at the analysis stage and by improving
the interface between analysis and design phase.},
pdf = {http://michael.hahsler.net/research/virlib_AMCIS2002/virlib_amcis2002.pdf},
url = {http://aisel.isworld.org/article_by_author.asp?Author_ID=86},
category = {software engineering}
}
@inproceedings{hahsler:GeyerSchulz2001,
author = {Andreas Geyer-Schulz and Michael Hahsler and Maximillian Jahn},
title = {Recommendations for Virtual Universities from Observed User Behavior},
booktitle = {Classification, Automation, and New Media, Proceedings of the 24th
Annual Conference of the Gesellschaft f{\"u}r Klassifikation e.V.,
University of Passau, March 15--17, 2000 },
year = {2002},
editor = {W. Gaul and G. Ritter},
pages = {273--280},
publisher = {Springer-Verlag},
abstract = { Recently recommender systems started to gain ground in commercial
Web-applications. For example, the online-bookseller {\em amazon.com}
recommends his customers books similar to the ones they bought using
the analysis of observed purchase behavior of consumers. In this
article we describe a generic architecture for recommender services
for information markets which has been implemented in the setting
of the Virtual University of the Vienna University of Economics and
Business Administration (http://vu.wu-wien.ac.at). The architecture
of a recommender service is defined as an agency of interacting software
agents. It consists of three layers, namely the meta-data management
system, the broker management system and the business-to-customer
interface.},
pdf = {http://michael.hahsler.net/research/recomm_gfkl2000/paper.pdf},
url = {http://www.springer.com/east/home/business/business+information+systems?SGWID=5-170-69-173622621-0},
category = {recommender systems}
}
@incollection{hahsler:GeyerSchulz2002b,
author = {Andreas Geyer-Schulz and Michael Hahsler and Maximillian Jahn},
title = {A Customer Purchase Incidence Model Applied to Recommender Systems},
booktitle = {WEBKDD 2001 - Mining Log Data Across All Customer Touch Points, Third
International Workshop, San Francisco, CA, USA, August 26, 2001,
Revised Papers},
publisher = {Springer-Verlag},
year = {2002},
editor = {R. Kohavi and B.M. Masand and M. Spiliopoulou and J. Srivastava},
series = {Lecture Notes in Computer Science LNAI 2356},
pages = {25--47},
month = jul,
abstract = {In this contribution we transfer a customer purchase incidence model
for consumer products which is based on Ehrenberg s repeat-buying
theory to Web-based information products. Ehrenberg s repeat-buying
theory successfully describes regularities on a large number of consumer
product markets. We show that these regularities exist in electronic
markets for information goods, too, and that purchase incidence models
provide a well founded theoretical base for re-commender and alert
services. The article consists of two parts. In the first part Ehrenberg
s repeat-buying theory and its assumptions are reviewed and adapted
for web-based information markets. Second, we present the empirical
validation of the model based on data collected from the information
market of the Virtual University of the Vienna University of Economics
and Business Administration from September 1999 to May 2001.},
note = {(Revised version of the WEBKDD 2001 paper ``A Customer Purchase
Incidence Model Applied to Recommender Systems'')},
pdf = {http://michael.hahsler.net/research/recomm_lncs2001/lncswebkdd2001a/lncswebkdd2001a.pdf},
url = {http://www.springerlink.com/content/mb2rqan13gy9/},
category = {recommender systems}
}
@techreport{hahsler:GeyerSchulz2001d,
author = {Andreas Geyer-Schulz and Michael Hahsler},
title = {Software Engineering with Analysis Patterns},
institution = {Working Papers on Information Processing and Information Management,
Institut f{\"u}r Informationsverarbeitung und -wirtschaft, Wirtschaftsuniversit{\"a}t
Wien},
year = {2001},
type = {Working Paper},
number = {01/2001},
address = {Augasse 2--6, 1090 Wien, Austria},
month = nov,
abstract = { The purpose of this article is twofold, first to promote the use
of patterns in the analysis phase of the software life-cycle by proposing
an outline template for analysis patterns that strongly supports
the whole analysis process from the requirements analysis to the
analysis model and further on to its transformation into a flexible
design. Second we present, as an example, a family of analysis patterns
that deal with a series of pressing problems in cooperative work,
collaborative information filtering and sharing, and knowledge management.
We present the step-by-step evolution of the analysis pattern virtual
library with active agents starting with a simple pinboard. },
html = {http://michael.hahsler.net/research/virlib_working2001/virlib/virlib.html},
nopdf = {http://michael.hahsler.net/research/virlib_working2001/virlib.pdf},
url = {http://epub.wu.ac.at/id/eprint/592},
category = {software engineering}
}
@inproceedings{hahsler:GeyerSchulz2001e,
author = {Andreas Geyer-Schulz and Michael Hahsler and Maximillian Jahn},
title = {{Wissenschaftliche Recommendersysteme in Virtuellen Universit{\"a}ten}},
booktitle = {Unternehmen Hochschule},
year = {2001},
editor = {H.-J. Appelrath and R. Beyer and U. Marquardt and H.C. Mayr and C.
Steinberger},
address = {Wien, {\"O}sterreich},
month = sep,
note = {Symposium UH2001, GI Lecture Notes in Informatics (LNI)},
abstract = { In diesem Beitrag wird die Rolle von Recommendersystemen und ihr
Potential in der Lehr-, Lern- und Forschungsumgebung einer Virtuellen
Universit{\"a}t untersucht.Die Hauptidee dieses Beitrags besteht
darin, die Informationsaggregationsf{\"a}higkeiten von Recommendersystemen
in einer Virtuellen Universit{\"a}t auszunutzen, um Tutoren-und Beratungsdienste
in einer Virtuellen Universit{\"a}t automatisch zu verbessern, um
damit Betreuung und Beratung von Studierenden zu personalisieren
und f{\"u}r eine gr{\"o}{\ss}ere Anzahl von Teilnehmern bei gleichzeitiger
Entlastung der Lehrenden verf{\"u}gbar zu machen. Im zweiten Teil
dieses Beitrags werden die Recommenderdienste von myVU, der Sammlung
der personalisierten Dienste der Virtuellen Universit{\"a}t (VU)
der Wirtschaftsuniversit{\"a}t Wien und ihre nicht-personalisierten
Variantenbeschrieben, die im Wesentlichen auf beobachtetem Benutzerverhalten
und, in der personalisierten Variante, zus{\"a}tzlich auf Selbstselektion
durch Selbsteinsch{\"a}tzung der Erfahrung in einem Fachgebiet beruhen.
Abschlie{\ss}end wird noch der innovative Einsatz solcher Systeme diskutiert
und an einigen Szenarien beschrieben. },
pdf = {http://michael.hahsler.net/research/unternehmenhochschule2001/uh2001.pdf},
url = {http://www.gi-ev.de/},
category = {recommender systems}
}
@article{hahsler:GeyerSchulz2001b,
author = {Andreas Geyer-Schulz and Michael Hahsler and Maximillian Jahn},
title = {Educational and Scientific Recommender Systems: Designing the Information
Channels of the Virtual University},
journal = {International Journal of Engineering Education},
year = {2001},
volume = {17},
pages = {153--163},
number = {2},
abstract = {In this article we investigate the role of recommender systems and
their potential in the educational and scientific environment of
a Virtual University. The key idea is to use the information aggregation
capabilities of a recommender system to improve the tutoring and
consulting services of a Virtual University in an automated way and
thus scale tutoring and consulting in a personalized way to a mass
audience. We describe the recommender services of myVU, the collection
of the personalized services of the Virtual University (VU) of the
Vienna University of Economics and Business Administration which
are based on observed user behavior and self assignment of experience
which are currently field-tested. We show, how the usual mechanism
design problems inherent to recommender systems are addressed in
this prototype.},
issn = {0949-149X},
pdf = {http://michael.hahsler.net/research/recomm_ijee2001/paper.pdf},
series = {Special Issue on Virtual Universities},
url = {http://www.ijee.dit.ie/contents/c170201.html},
category = {recommender systems}
}
@inproceedings{hahsler:GeyerSchulz2001c,
author = {Andreas Geyer-Schulz and Michael Hahsler and Maximillian Jahn},
title = {A Customer Purchase Incidence Model Applied to Recommender Systems},
booktitle = {WEBKDD2001 Workshop: Mining Log Data Across All Customer TouchPoints},
year = {2001},
pages = {35--45},
address = {San Francisco, CA},
month = aug,
abstract = {In this contribution we transfer a customer purchase incidence model
for consumer products which is based on Ehrenberg's repeat-buying
theory to Web-based information products. Ehrenberg's repeat-buying
theory successfully describes regularities in a large number of consumer
product markets. We show that these regularities exist in electronic
markets for information goods too, and that purchase incidence models
provide a well founded theoretical foundation for recommender and
alert systems. The article consists of three parts. First, we present
the architecture of an information market and its instrumentation
for collecting data on customer behavior. In the second part Ehrenberg's
repeat-buying theory and its assumptions are reviewed and adapted
for Web-based information markets. Finally, we present the empirical
validation of the model based on data collected from the information
market of the Virtual University of the Vienna University of Economics
and Business Administration at http://vu.wu-wien.ac.at },
pdf = {http://michael.hahsler.net/research/recomm_webKDD2001/paper/geyerschulz.pdf},
category = {recommender systems}
}
@phdthesis{hahsler:Hahsler2001,
author = {Michael Hahsler},
title = {Analyse Patterns im Softwareentwicklungsproze{\ss} mit Beispielen f{\"u}r
Informationsmanagement und deren Anwendungen f{\"u}r die Virtuellen
Universit{\"a}t der Wirtschaftsuniversit{\"a}t Wien},
school = {Wirtschaftsuniversit{\"a}t Wien},
year = {2001},
type = {Dissertation},
address = {Augasse 2--6, A 1090 Wien, {\"O}sterreich},
month = jan,
abstract = {Diese Arbeit besch{\"a}ftigt sich mit Analyse Patterns, der Anwendung
von Patterns in der Analysephase der Softwareentwicklung. In der
Designphase werden Patterns seit einigen Jahren eingesetzt, um Expertenwissen
und Wiederverwendbarkeit in den Designproze{\ss} einflie{\ss}en zu lassen.
Es existiert bereits eine F{\"u}lle an solchen Design Patterns. Die
Analysephase ist ein neuer Anwendungsbereich f{\"u}r Patterns, der
bisher in der Literatur noch nicht ausreichend behandelt wurde. In
dieser Arbeit wird die Anwendung des Pattern-Ansatzes in der Analysephase
aufgearbeitet und konkretisiert. Analyse Patterns unterst{\"u}tzen
den gesamten Softwareentwicklungsproze{\ss} und helfen bekannte Probleme
w{\"a}hrend der Analysephase zu l{\"o}sen. Dadurch k{\"o}nnen Zeit
und Kosten bei der Entwicklung neuer Softwaresysteme eingespart werden.
Diese Eigenschaften von Analyse Patterns werden anhand konkreter
Beispiele in einer Case Study nachgewiesen. Diese Case Study beschreibt
den Einsatz von in dieser Arbeit entwickelten Analyse Pattern f{\"u}r
Informationsmanagement anhand des Projekts Virtuelle Universit{\"a}t
der Wirtschaftsuniversit{\"a}t Wien, in dem ein Internet-Informationsbroker
zur Unterst{\"u}tzung von Lehre und Forschung realisiert wird. Die
Erfahrungen aus diesem Projekt werden untersucht, und die Auswirkungen
der Analyse Patterns auf Wiederverwendung bei der Softwareentwicklung
und auf die Akzeptanz des resultierenden Systems werden pr{\"a}sentiert.},
pdf = {http://michael.hahsler.net/research/diss/diss.pdf},
url = {http://epub.wu.ac.at/id/eprint/1866},
category = {software engineering}
}
@inproceedings{hahsler:GeyerSchulz2000,
author = {Andreas Geyer-Schulz and Michael Hahsler},
title = {Automatic Labelling of References for Information Systems},
booktitle = {Classification and Information Processing at the Turn of the Millennium,
Proceedings of the 23rd Annual Conference of the Gesellschaft f{\"u}r
Klassifikation e.V., University of Bielefeld, March 10--12, 1999},
year = {2000},
editor = {Reinhold Decker and Wolfgang Gaul},
series = {Studies in Classification, Data Analysis, and Knowledge Organization},
pages = {451--459},
publisher = {Springer-Verlag},
abstract = {Today users of Internet information services like e.g. Yahoo! or AltaVista
often experience high search costs. One important reason for this
is the necessity to browse long reference lists manually, because
of the well-known problems of relevance ranking. A possible remedy
is to complement the references with automatically generated labels
which provide valuable information about the referenced information
source. Presenting suitably labelled lists of references to users
aims at improving the clarity and thus comprehensibility of the information
offered and at reducing the search cost. In the following we survey
several dimensions for labelling (time, frequency of usage, region,
language, subject, industry, and preferences) and the corresponding
classification problems. To solve these problems automatically we
sketch for each problem a pragmatic mix of machine learning methods
and report selected results.},
pdf = {http://michael.hahsler.net/research/labeling_gfkl1999/paper/labelling.pdf},
url = {http://www.springer.com/east/home/business/business+information+systems?SGWID=5-170-69-173622621-0},
category = {recommender systems}
}
@inproceedings{hahsler:GeyerSchulz2000c,
author = {Andreas Geyer-Schulz and Michael Hahsler},
title = {{Lebenslanges virtuelles Lernen}},
booktitle = {{Europas Arbeitswelt von Morgen}},
year = {2000},
editor = {Franciszek Grucza},
pages = {51--54},
address = {Wien},
publisher = {Wiener Zentrum der Polnischen Akademie der Wissenschaften},
url = {http://www.viennapan.org/06de01.htm}
}
@incollection{hahsler:GeyerSchulz2000a,
author = {Andreas Geyer-Schulz and Michael Hahsler and Maximillian Jahn},
title = {myVU: A Next Generation Recommender System Based on Observed Consumer
Behavior and Interactive Evolutionary Algorithms},
booktitle = {Data Analysis: Scientific Modeling and Practical Applications},
publisher = {Springer Verlag},
year = {2000},
editor = {Wolfgang Gaul and Otto Opitz and Martin Schader},
series = {Studies in Classification, Data Analysis, and Knowledge Organization},
pages = {447--457},
address = {Heidelberg, Germany},
abstract = {myVU is a next generation recommender system based on observed consumer
behavior and interactive evolutionary algorithms implementing customer
relationship management and one-to-one marketing in the educational
and scientific broker system of a virtual university. myVU provides
a personalized, adaptive WWW-based user interface for all members
of a virtual university and it delivers routine recommendations for
frequently used scientific and educational Web-sites.},
pdf = {http://michael.hahsler.net/research/festschrift2000/paper.pdf},
url = {http://www.springer.com/east/home/business/business+information+systems?SGWID=5-170-69-173622621-0},
category = {recommender systems}
}
@inproceedings{hahsler:Hahsler2000,
author = {Michael Hahsler and Bernd Simon},
title = {User-centered Navigation Re-Design for Web-based Information Systems},
booktitle = {Proceedings of the Sixth Americas Conference on Information Systems
(AMCIS 2000)},
year = {2000},
editor = {H. Michael Chung},
pages = {192--198},
address = {Long Beach, CA},
publisher = {Association for Information Systems},
abstract = {Navigation design for web-based information systems (e.g. e-commerce
sites, intranet solutions) that ignores user-participation reduces
the system's value and can even lead to system failure. In this paper
we introduce a user-centered, explorative approach to re-designing
navigation structures of web-based information systems, and describe
how it can be implemented in order to provide flexibility and reduce
maintenance costs. We conclude with lessons learned from the navigation
re-design project at the Vienna University of Economics and Business
Administration.},
pdf = {http://michael.hahsler.net/research/webdesign_amcis2000/TT04-11_final.pdf},
url = {http://aisel.isworld.org/article_by_author.asp?Author_ID=86},
category = {marketing}
}
@inproceedings{hahsler:GeyerSchulz1999b,
author = {Andreas Geyer-Schulz and Michael Hahsler and Georg Schneider},
title = {The Virtual University as a Network Economy},
booktitle = {Informatik '99, Unternehmen Hochschule '99, Workshop-Unterlagen},
year = {1999},
editor = {Heinrich C. Mayr and Claudia Steinberger and Hans-J{\"u}rgen Appelrath
and Uwe Marquardt},
pages = {75--86},
address = {Bielefeld, Germany},
month = oct
}
@article{hahsler:GeyerSchulz1999,
author = {Andreas Geyer-Schulz and Michael Hahsler and Georg Schneider},
title = {The Virtual University and Its Embedded Agents},
journal = {{\"O}{G}{A}{I}{} Journal},
year = {1999},
volume = {18},
pages = {14--19},
number = {1},
abstract = {In this article we present the current state of usage of (intelligent)
Internet agents in the Virtual University (VU) of the Vienna University
of Economics and BA. We discuss opportunities and challenges for
the development of several classes of agents and their sensor systems.
More specifically, agents of the following classes embedded in the
virtual university system will be presented: (1) robots which support
navigation services and (2) robots which support communication and
collaboration.},
issn = {0254-4326}
}
@article{hahsler:GeyerSchulz1998,
author = {Peter Bruhn and Andreas Geyer-Schulz and Michael Hahsler and Markus
Mottel},
title = {Genetic Machine Learning and Intelligent Internet Agents},
journal = {{\"O}{G}{A}{I}{} Journal},
year = {1998},
volume = {17},
pages = {18--25},
number = {1},
abstract = { In this paper we report on the status quo of the current machine
learning research projects at the Department of Applied Computer
Science of the Institute of Information Processing and Information
Economics of the Vienna University of Economics and Business Administration.
The current research activities can be categorized as follows: (1)
Development of a theoretic framework of genetic programming. (2)
Application of genetic programming for managerial and economic decision-making
and for breeding agents' strategies in organizational learning. (3)
Development, adaptation, and integration of (intelligent) Internet
agents for support of the virtual organizations. (4) Development
of an infrastructure for intelligent Internet agents in the ''Living
Lectures - Virtual University'' project. (5) Cost-benefit analysis
of agents, analysis of tactical and strategic consequences of agents
and the analysis of their economic applications. },
issn = {0254-4326}
}
@mastersthesis{hahsler:Hahsler1997,
author = {Michael Hahsler},
title = {{Software Patterns: Pinw{\"a}nde}},
school = {Wirtschaftsuniversit{\"a}t Wien},
year = {1997},
type = {Diplomarbeit},
address = {Augasse 2--6, A 1090 Wien, {\"O}sterreich},
month = nov,
abstract = {Diese Arbeit besch{\"a}ftigt sich mit dem Pattern-Ansatz f{\"u}r die
Architektur von Software. Nach einer kurzen Darstellung des Ansatzes
werden das Pinwand-Pattern und seine Varianten beschrieben. Pinw{\"a}nde
werden verwendet, um Informationen zu sammeln und Interessierten
zur Verf{\"u}gung zu stellen. Sie finden unter anderem in den folgenden
Bereichen Anwendung: Groupware-Anwendungen, Conferencing Systeme,
Diskussionsforen und Virtuelle Bibliotheken.},
pdf = {http://michael.hahsler.net/research/diplomarbeit/dipl/pinwand_patterns.pdf},
category = {software engineering}
}
This file was generated by bibtex2html 1.96.