#!/afs/wsi/@sys/bin/perl
##############################################################
# #
# RCL : Radius-based Competitive Learning #
# #
# ------------------------------------------------------- #
# #
# Author: Udo Heuser (heuser@informatik.uni-tuebingen.de) #
# #
# ------------------------------------------------------- #
# #
# Refs.: #
# 1. [HeHe95] R. Henrion, G. Henrion: Multivariate #
# Datenanalyse, Methodik und Anwendung in der #
# Chemie, Springer-Verlag, 1995 (in German) #
# #
# 2. [ScEr97] E. Schikuta, M. Erhart: The BANG-Clus- #
# tering System: Grid-Based Data Analysis, in #
# X. Liu, P. Cohen, M. Berthold (Eds.): Advances #
# in Intelligent Data Analysis (IDA-97), LNCS 1280 #
# pp. 513-524, 1997 #
# 3. [Sedg92] R. Sedgewick, Algorithms in C++, #
# Addison-Wesley Publishing Company, 1992 #
# #
##############################################################
# RCL default documents dir
$rcl_home = "/home/heuser/OASIS/RCL/Docs";
#
# usage
#
if ( ( $ARGV[0] =~ "-help" ) || ( $ARGV[0] =~ "-h" ) ) {
&usage;
exit;
}
elsif ( $ARGV[0] =~ "-v" ) {
print STDERR "rcl version 0\n";
print STDERR "last modified: Thu Oct 1 11:17:29 MET DST 1998\n";
exit;
}
#
# read in args
#
while ( $ARGV[0] =~ /^-/ ) {
if ( $ARGV[1] =~ /^-/ ) {
print STDERR "$0: Error in reading arguments.\n";
&usage;
exit(1);
}
else {
if ( $ARGV[0] eq "-i" ) {
$i = "$rcl_home/" . $ARGV[1];
}
elsif ( $ARGV[0] eq "-o" ) {
$o = ">$rcl_home/" . $ARGV[1];
}
elsif ( $ARGV[0] eq "-oeq" ) {
$o_eqe = ">$rcl_home/" . $ARGV[1];
}
elsif ( $ARGV[0] eq "-rad" ) {
$radius = $ARGV[1];
}
elsif ( $ARGV[0] eq "-non" ) {
$max_neurons = $ARGV[1];
}
elsif ( $ARGV[0] eq "-ovl" ) {
$overlap = $ARGV[1];
}
elsif ( $ARGV[0] eq "-sig" ) {
$sigma = $ARGV[1];
}
elsif ( $ARGV[0] eq "-log" ) {
$log = $ARGV[1];
}
elsif ( $ARGV[0] eq "-debug" ) {
$debug = $ARGV[1];
}
shift; shift;
}
}
#
# global args initialisation
#
# RCL input file
if ( !defined($i) ) { $i = "$rcl_home/gm_out"; }
# RCL output file
if ( !defined($o) ) { $o = ">$rcl_home/rcl_out"; }
# RCL expected quantisation error output file
if ( !defined($o_eqe) ) { $o_eqe = ">$rcl_home/rcl_eqe"; }
# overlap
if ( !defined($overlap) ) { $overlap = 0; }
# standard deviation of init neurons from mean 0
# center init neurons around mean
# if ( !defined($sigma) ) { $sigma = 0.02; }
# scatter neurons thru input space
if ( !defined($sigma) ) { $sigma = 0.5; }
# RCL error log file
if ( !defined($log) ) { $log = "$rcl_home/rcl.log"; }
# RCL temp dir
if ( !defined($tmp_dir) ) { $tmp_dir = "$rcl_home/tmp"; }
# debug mode?
if ( !defined($debug) ) { $debug = 0; }
# define logging file
if ( ($log eq "STDERR") || ($log eq "stderr") ) {
$log = ">&STDERR";
}
else {
$log = ">" . $log;
}
if ( !defined open(LOG, $log) ) {
&terminate("$0: Can't open $log for error logging: $!\n");
}
#
# global constant defs
#
# Min/Max integer values (machine dependent)
$MAXINT = 1E308;
$MININT = -1E308;
# outer space boundary
$OUT = 1.5;
# calculate exact expected quantisation error?
$ceqe_ext = 1;
# calculate expected quantisation error using $radius?
$ceqe_rad = 0;
# calculate expected quantisation error using F-criterion?
$ceqe_fcr = 1;
# calculate expected quantisation error using normalized F-criterion?
$ceqe_nfcr = 1;
# fine tune results?
$fine_tuning = 1;
$const_nseq_rep_0 = 150;
$const_nseq_rep_1 = 0.8;
$const_eqe_der = 30;
$const_pruning = 20;
#
# global vars initialisations
#
# dimension of input document vectors (profiles)
$dim = 0;
# no. of input document vectors
$samples = 0;
# no. of training cycles
$lsteps = 0;
# $min_sample threshold: no. of equally distributed document vectors
# of every neuron with radius $radius
$min_sample = $MAXINT;
# boundaries of BANG cells
@bound;
# define overlap ranges
if ( $overlap < 0 || $overlap >= 1 ) { &terminate("$0: overlap out of range [0;1[\n"); }
#
# main
#
# read document vectors and dim from documents input file
&rcl_in;
# define RCL neuron traces, centroids and sample output file
if ( $dim < 4 ) {
# neuron traces output file
if ( !defined($o_nt) ) { $o_nt = ">$rcl_home/rcl_trace"; }
# cluster centroids output file
if ( !defined($o_ctr) ) { $o_ctr = ">$rcl_home/rcl_ctr"; }
# current input sample output file
if ( !defined($o_sam) ) { $o_sam = ">$rcl_home/rcl_osam"; }
}
# Generating $max_neurons neurons with initial randomized $dim-dim weights.
print LOG "Initialize RCL neurons.\n";
if ( defined($max_neurons) ) { &rcl_init; }
elsif ( defined($radius) ) {
for ( $d=0; $d<$dim; ++$d ) {
$bound[$d][0] = -1;
$bound[$d][1] = 1;
}
$n = 0;
&rcl_init_auto(*bound);
}
else {
&terminate("$0: Neither radius nor no. of init neurons defined!\n");
}
# print out startup values
print LOG "\tradius: $radius, overlap: $overlap\n";
print LOG "\tno. of init neurons: $max_neurons\n";
print LOG "\tno. of document vectors: $samples, dim: $dim\n";
print LOG "\tmax. no. of training cycles: $lsteps\n";
print LOG "\tmin. no. of samples for radius $radius: $min_sample\n";
# print out init neuron traces
if ( defined($o_nt) ) { &pr_ntr_init; }
# print out init expected quantisation error
if ( defined($o_eqe) ) { &pr_eqe_init; }
# RCL training cycles
if ( $lsteps ) { &rcl_lstep; }
# display total consumed time and close report files
∑
# return no. of detected clusters
# return $max_neurons;
exit;
#
# subroutines
#
# print usage
sub usage {
# local($pr_name) = split(/\//, $0);
# print STDERR "\nUsage: $pr_name \{-i \} \{-o