#
#### This script creates operon predictions in the directory specified. It also creates "window" gene
#### neighbor scores for use in regulon prediction. Use LoadOperons.pl to put
#### the operon predictions into the database.
####
#### This script MUST be run in Genomics/seq, and it does not have much error handling, so
#### you should check the output (which is voluminous, both to STDOUT and to STDERR) for
#### Failed or failed to make sure that everything ran OK.
out=$1
if [[ $1 == "" ]]; then
echo Must specify an output directory
exit 2
fi

if [[ $2 == "" || $2 == "GNC" ]]; then
echo all neighor scores
./GeneNeighborsConservedAll.pl -out=$out/gnc
echo make gncAllGenomes
head -1 $out/gnc83333.genomes > $out/gncAllGenomes
perl -F\\t -ane 'print $_ if $F[0] ne "TaxId1"' $out/*[0-9].genomes >> $out/gncAllGenomes
perl -F\\t -ane 'print "$F[0]\n" if $F[0] ne "TaxId1"' < $out/gncAllGenomes | sort -u > $out/genomesList
fi
if [[ $2 == "" || $2 == "Cluster" ]]; then
echo cluster both
./ClusterGenomes.pl -maxConvBoth=50 -in=$out/gncAllGenomes > $out/clusterBoth50
echo cluster adj
./ClusterGenomes.pl -maxConvAdj=5 -in=$out/gncAllGenomes > $out/clusterConvAdj5
echo build gncAllGenomes\* descriptive files
../util/join.pl -match 1.TaxId1=2.TaxId -ignore 2.TaxId,2.Genome -rename 2.Cluster=CB1,2.Size=nCB1 $out/gncAllGenomes $out/clusterBoth50 | ../util/join.pl -match 1.TaxId2=2.TaxId -ignore 2.TaxId,2.Genome -rename 2.Cluster=CB2,2.Size=nCB2 - $out/clusterBoth50 > $out/gncAllGenomesB
../util/join.pl -match 1.TaxId1=2.TaxId -ignore 2.TaxId,2.Genome -rename 2.Cluster=CN1,2.Size=nCN1 $out/gncAllGenomesB $out/clusterConvAdj5 | ../util/join.pl -match 1.TaxId2=2.TaxId -ignore 2.TaxId,2.Genome -rename 2.Cluster=CN2,2.Size=nCN2 - $out/clusterConvAdj5 > $out/gncAllGenomesBN
fi
if [[ $2 == "" || $2 == "Phylo" ]]; then
echo all phylo scores
./GenePhyloScoresAll.pl -gnc $out/gnc -out $out/gnc -cluster $out/clusterBoth50
fi
if [[ $2 == "" || $2 == "GNScore" ]]; then
echo all neighbor scores
./GeneNeighborScoresAll.pl -gnc $out/gnc -out $out/gnc -cluster $out/clusterConvAdj5
fi
if [[ $2 == "" || $2 == "Bias" ]]; then
echo all codon bias
perl -F\\t -ane "chomp \$F[0]; print \"CAI for \$F[0]...\n\"; system(\"../util/codonBias.pl -taxId \$F[0] -ref cog > $out/cai\$F[0]\");" < $out/genomesList
echo all .cai file
perl -ane "chomp \$F[0];system(\"../util/join.pl -only 10 -match 1.Gene1=2.\#gi -ignore 2.\#gi $out/gnc\$F[0].scores $out/cai\$F[0] | ../util/join.pl -match 1.Gene2=2.\#gi -only 10 -ignore 2.\#gi -rename 2.CAI=nCAI,2.CBI=nCBI,2.CAI_Alm=nCAI_Alm,2.COG=nCOG,2.Annotation=nAnnotation,2.COGanno=nCOGanno,2.COGfun=nCOGfun - $out/cai\$F[0] > $out/gnc\$F[0].cai\")==0 && print STDERR \"ran \$F[0]\\n\";" < $out/genomesList
fi
if [[ $2 == "" || $2 == "Window" ]]; then
echo all window scores
./GeneNeighborScoresAll.pl -gnc $out/gnc -geneWindow 10 -out $out/window -cluster $out/clusterConvAdj5 
fi
if [[ $2 == "" || $2 == "Predict" ]]; then
echo all predictions
R --vanilla <<END
source("../util/utils.R")
opAll <- OperonsPredictAll(plot=FALSE,save=TRUE,gncDir="$out"); # automatically saved
cat("Predict all done\n");
opAllNaive <- OperonsPredictAll(plot=FALSE,save=FALSE,usefOperonNaive=TRUE,gncDir="$out");
writeDelim(opAllNaive,"$out/gncAllGenomes.statsNaivefOperon");
cat("Predict naive done\n");
opAllMI <- OperonsPredictAll(formula=same~bbfGNMinus+bbfGNScore+bbfGNWithin+cfCOG+bbfMI,save=FALSE,plot=FALSE,gncDir="$out");
writeDelim(opAllMI,"$out/gncAllGenomes.withMI");
cat("Predict MI done\n");
END
fi
