#!/usr/bin/perl -w
package Cluster;
require Exporter;

use strict;
use lib "../stat";
use lib "../../stat";
use Vector;

our (@ISA, @EXPORT);
@ISA = qw(Exporter);
@EXPORT = qw(clusterSingly readCluster);

{

    # accepts a reference to a list of hashable objects and reference to a hashtable
    # stating which pairs are similar - true or false. (needn't be symmetric, and false
    # entries may be omitted).
    # Returns a hash of cluster ids -> list of objects and a hash of object -> cluster id
    sub clusterSingly($$) {
	my ($listRef, $hashRef) = @_;
	my $nObj = scalar @{ $listRef };
	my $maxObj = $nObj-1;
	my $nClusters = $nObj;
	my %objCluster = map { $listRef->[$_] => $_ } (0..$maxObj);

	my @listClusters = map { [ $listRef->[$_] ] } (0..$maxObj);

	foreach my $obj1 (@{ $listRef }) {
	    if (exists $hashRef->{$obj1}) {
		foreach my $obj2 (keys %{ $hashRef->{$obj1}}) {
		    die join(" ", "Illegal value", $obj1, $obj2, $hashRef->{$obj1}{$obj2}),"\n"
			if !exists $objCluster{$obj2};
		    if ($hashRef->{$obj1}{$obj2} && $objCluster{$obj1} != $objCluster{$obj2}) {
			# put them in the same cluster, using the lower index
			my $nNew = Vector::min($objCluster{$obj1}, $objCluster{$obj2});
			my $nOld = Vector::max($objCluster{$obj1}, $objCluster{$obj2});
			
			foreach my $obj (@{ $listClusters[$nOld] }) {
			    push @{ $listClusters[$nNew] }, $obj;
			    $objCluster{$obj} = $nNew;
			}
			$listClusters[$nOld] = [];
		    }
		}
	    }
	}

	my %mapClusters = (); # cluster -> list of obj
	foreach my $obj (@{ $listRef }) {
	    GenomicsUtils::addToHashList( \%mapClusters, $objCluster{$obj}, $obj );
	};
	return( \%mapClusters, \%objCluster );
    }

    # given the file, returns 2 hashes: cluster id->list of strings and strings->cluster id
    sub readCluster( $ ) {
	my %objToCluster = ();
	my %clusterToObjs = ();
	my $clusterFile = shift;
	open(CLUSTERFILE, '<', $clusterFile) || die "Cannot read from $clusterFile\n";
	while(<CLUSTERFILE>) {
	    s/[\r\n]+$//;
	    my @values = split /\t/;
	    next if $values[0] eq "Cluster";
	    my $cluster = $values[0];
	    my $obj = $values[2];
	    die "Dup cluster assignment for $obj" if exists $objToCluster{$obj};
	    $objToCluster{$obj} = $cluster;
	    $clusterToObjs{$cluster} = [] if !exists $clusterToObjs{$cluster};
	    push @{ $clusterToObjs{$cluster} }, $obj;
	}
	close(CLUSTERFILE) || die "Cannot close $clusterFile";
	return (\%clusterToObjs, \%objToCluster);
    }
}
	
return 1;

