################################################################
#!/bin/bash
# gold_analysis.sh
# bash script for calculating rmsd between docking solutions of GOLD
# Stefan Henrich
#
# 31.08.2005
# 28.11.2005	--sdfile
# 12.01.2006	remove double (empty) lines from docking solution sd files
# 20.04.2006	match residue and pdb file
# 07.06.2006	antechamber pdb->mol2
version="EML version 08.06.2006"
################################################################
# gold_anaysis.sh [-pdb1,-sdf1] [-pdb2,-sdf2]
#
# required programs:
# smartrms (GOLD)
# babel
# extract_prop_sdf (SDF toolkit)
#
#
# This script creates a table of RMSD values calculated between a
# reference structure (eg. xray structure of ligand) and docking
# solutions of gold. The residue name <residue> is got from the 
# SDF of docking solution with file name [template file name] and
# is used for reading <residue>.pdb in
# directory (-d $directory name). The PDB file of the ligand is
# converted by babel into an SD file, which is compared to all the 
# docking solutions by smartrms of GOLD.
#
# OUTPUT:
# summary_rmsd.log		table with RMSD values
# best_rmsd.sdf			SD file with with docking solutions with
#						the best RMSD values
# ranked_$allfiles_m_best_sort.sdf  
# 			one SD file with best ranked docking solutions and sort it for residue
#
# -all [template file name] (e.g. PDB_uPA_180805)
# 
# 
# 
# 
# 
# Directory for reference structure:
directory="/home/henricsn/combine2go/data/urokinase/pdb/ligand/"
##################################################################
##################################################################
# DO NOT CHANGE ANYTHING BELOW THIS LINE
# UNLESS YOU KNOW WHAT YOU ARE DOING
##################################################################
##################################################################
##################################################################
##################################################################
pdb1="";pdb2=""
until [ -z "$1" ]; do
	case $1 in
		--help|-h )
			echo options:
			echo "-h       help"
			echo "-vers    version ($version)"
			echo "-d       \$DIR directory ($directory)"
			echo "-pdb1    \$FILE pdb file of first molecule (eg. xray)"
			echo "-sdf1    \$FILE sdf file of first molecule (eg. xray)"
			echo "-pdb2    \$FILE pdb file of second molecule (eg. docking solution)"
			echo "-sdf2    \$FILE sdf file of second molecule (eg. docking solution)"
			echo "-ranked  calculating rmsd for several docking solutions"
			echo "         \$FILE _* template file name"
			echo "-all     calculating rmsd for all docking solutions in subfolders"
			echo "         \$FILE _m* template file name"
			echo "-file    SD file name with multiple structures"
			exit 0
		;;
		--version|-vers )
			echo $version
			exit 0
		;;
		--environment|-e )
			# set to 1 for later testing
			shift
			echo "--environment option doesn't work"
			exit 0
		;;
		--directory|-d )
			shift
			directory=$1
			echo directory $directory
		;;
		--pdb1|-pdb1 )
			shift
			pdb1=$1
			echo $pdb1
		;;
		--sdf1|-sdf1 )
			shift
			sdf1=$1
			echo $sdf1
		;;
		--pdb2|-pdb2 )
			shift
			pdb2=$1
			echo $pdb2
		;;
		--sdf2|-sdf2 )
			shift
			sdf2=$1
			echo $sdf2
		;;
		--ranked|-ranked )
			shift
			filename=$1
		;;
		--all|-all )
			shift
			allfiles=$1
		;;
		--sdfile|-file )
			shift
			sdfile=$1
			allfiles=$sdfile
		;;
		* ) 
			echo no command is given
		;;
	esac 

	shift

	if [ "$#" = "0" ]; then
		break  
	fi
done 

source /sw/mcm/app/prepare/prepare.bash
#prepare gold_3.0
prepare gold_2.2
prepare amber8
#prepare corina

##################################################################
######## babel ########
if [ "$pdb1" != "" ] || [ "$pdb2" != "" ] && [ "$allfiles" == "" ]; then
   prepare babel
   if [ "$pdb1" != "" ]; then	
	   echo babel -ipdb $pdb1 -osdf "sdf1.sdf"
	   babel -ipdb $pdb1 -osdf "sdf1.sdf"
#	   sdf1=$(babel -ipdb $pdb1 -osdf)
#	   echo $sdf1
	   sdf1="sdf1.sdf"
   fi
   if [ "$pdb2" != "" ]; then	
	   babel -ipdb $pdb2 -osdf "sdf2.sdf"
	   $sdf2="sdf2.sdf"
   fi
fi #babel

##################################################################
##################################################################
######## settings for smart_rms (GOLD) ########

# Simple script for starting the 'smart_rms' utility.

if [ "X$GOLD_DIR" = "X" ]
then
    echo "Environment variable GOLD_DIR not set"
    exit 1
fi

# If GOLD_DIR appears to have been set correctly,
# use it to find the 'bin' directory
if [ -r ${GOLD_DIR}/bin/gold_init_settings ] ; then
    gold_bin_dir=${GOLD_DIR}/bin
# otherwise assume this script is in the 'bin' directory
else
    gold_bin_dir=`dirname $0`
fi

# Source the standard environment settings
. ${gold_bin_dir}/gold_init_settings

setup_library_path serial

if [ "X$GOLD_FRONTEND_EXEDIR" = "X" ] ; then
    executable=${GOLD_DIR}/gold/d_$machtype/bin/smartrms_${type}
else
    executable=${GOLD_FRONTEND_EXEDIR}/smartrms_${type}
fi

##################################################################
######## smart_rms (GOLD) ########
if [ "$filename" == "" ] && [ "$allfiles" == "" ]; then   

	#echo rmsd=$\(${executable} -h $sdf1 $sdf2 \| tail -n1  \| awk \'\{ print $3\}\'\)
	rmsd=$(${executable} -h $sdf1 $sdf2 | tail -n1  | awk '{ print $3}')
	echo $rmsd

##################################################################
######## ranked ########
elif  [ "$filename" != "" ] && [ "$allfiles" == "" ]; then
	filenumb=1; compbestrmsd=99999999;sum=0;
	filemax=$( ls "ranked_"$filename"_"* |wc| awk '{ print $1}')
	until [ "$filemax" -lt "$filenumb" ]; do
		sdf2="ranked_"$filename"_"$filenumb".sdf"
		rmsd=$(${executable} -h $sdf1 $sdf2 | tail -n1  | awk '{ print $3}')
		comprmsd=`echo $rmsd '*1000' | bc -l | awk -F '.' '{ print $1; exit; }'`
		list=$list" "$rmsd
		sum=$( echo "$sum + $rmsd" |bc -l)
		if [ "$compbestrmsd" -gt "$comprmsd" ]; then 
			compbestrmsd=$comprmsd
			bestrmsd=$rmsd
			bestnumb=$filenumb
		fi
		let filenumb+=1
	done
		average=$( echo "scale=3;$sum/$filemax" |bc -l )
		echo -e $filename '\t' RMSD $bestnumb $bestrmsd '\t' average $average '\t' $list '\n' > summary_rmsd.log
		echo -e $filename '\t' RMSD $bestnumb $bestrmsd

##################################################################
##################################################################
######## allfiles ########
else
#	prepare babel
	dirnumb=0;compbestrmsd=99999999;sum=0;
	if [ "$sdfile" != "" ]; then 
#		dirmax=$( grep '^$$$$' $sdfile |wc| awk '{ print $1}')
		dirmax=1
	else
		dirmax=$( ls -d $allfiles"_m"* |wc| awk '{ print $1}')
	fi
#	dirnumb=11;dirmax=11
	echo -e $(date) '\n' > summary_rmsd.log
	echo -e 'res \t # \t directory \t\t RMSD #best \t  best \t	gold \t average \t  SD \t list' >> summary_rmsd.log

	if [ -e "best_rmsd.sdf" ]; then rm -f best_rmsd.sdf; fi
	until [ "$dirmax" -le "$dirnumb" ]; do
		let dirnumb+=1
		filenumb=1; 
		if [ "$sdfile" != "" ]; then 
			filemax=$( grep '$$$$' $sdfile |wc| awk '{ print $1}')
			echo filemax $filemax
		else
			filemax=$( ls "$allfiles"_m"$dirnumb/ranked_"$allfiles"_"* |wc| awk '{ print $1}')
		fi
		until [ "$filemax" -lt "$filenumb" ]; do
			if [ "$sdfile" != "" ]; then 
				echo select_sdf -start $filenumb -end $filenumb \< $allfiles \> sdf2.sdf
				select_sdf -start $filenumb -end $filenumb < $allfiles > sdf2.sdf
				echo antechamber -i sdf2.sdf -fi mdl -o sdf2.mol2 -fo mol2 #-j 0
				antechamber -i sdf2.sdf -fi mdl -o sdf2.mol2 -fo mol2 #-j 0
				#echo /sw/mcm/app/corina/corina_permanent_Linux2.4.lnx -i t=sdf -o t=mol2 sdf2.sdf sdf2.mol2
				#/sw/mcm/app/corina/corina_permanent_Linux2.4.lnx -i t=sdf -o t=mol2 sdf2.sdf sdf2.mol2
				mol2=sdf2.mol2
				sdf2=sdf2.sdf
				#echo $sdf2
			else
#				sdf2=$allfiles"_m"$dirnumb"/ranked_"$allfiles"_m"$dirnumb"_"$filenumb".sdf"			
				uniq $allfiles"_m"$dirnumb"/ranked_"$allfiles"_m"$dirnumb"_"$filenumb".sdf"	> sdf2.sdf
				sdf2="sdf2.sdf"		
			fi
			if [ "$filenumb" -eq "1" ] || [ "$sdfile" != "" ]; then
				residue=$(extract_prop_sdf -property_name residue < $sdf2)
#				pdb1=/home/henricsn/combine2go/data/urokinase/pdb/ligand/$residue.pdb
#				pdb1=$directory$residue.pdb
				###for using mol2 files
				#pdb1=$(grep -l $residue $directory*.mol2|awk '{ print $1}')
				#mol1=$pdb1
				###for using pdb files
				pdb1=$(grep -l $residue $directory*.pdb|awk '{ print $1}')
				if [ "$pdb1" != "" ]; then #pdb file available
					echo antechamber -i $pdb1 -fi pdb -o sdf1.mol2 -fo mol2
					antechamber -i $pdb1 -fi pdb -o sdf1.mol2 -fo mol2
					mol1=sdf1.mol2
	
					echo $residue $(basename $pdb1)
					if [ ! -e "$pdb1" ] && [ "$sdfile" == "" ]; then 
						echo break1
						break
					fi
					#echo babel -ipdb $pdb1 -osdf "sdf1.sdf"
					#babel -ipdb $pdb1 -osdf "sdf1.sdf"
					sdf1="sdf1.sdf"
				fi #pdb file available
			fi
		if [ "$pdb1" != "" ]; then #pdb file available
			echo ${executable} -h $mol1 $mol2
			rmsd=$(${executable} -h $mol1 $mol2 | tail -n1  | awk '{ print $3}')
			echo $rmsd
			if [ "$sdfile" != "" ]; then 
				echo -e $filenumb $residue $rmsd >> summary_rmsd.log
				#list=""
			fi
			comprmsd=`echo $rmsd '*1000' | bc -l | awk -F '.' '{ print $1; exit; }'`
			list=$list"  "$rmsd
			value[$filenumb]="$rmsd"
			sum=$( echo "$sum + $rmsd" |bc -l)
			if [ "$compbestrmsd" -gt "$comprmsd" ]; then 
				compbestrmsd=$comprmsd
				bestrmsd=$rmsd
				bestnumb=$filenumb
			fi
			rmsd=0;let filenumb+=1
		fi #pdb file available
		done
		
		filenumb=1;sum_variance=0; 
		
		average=$( echo "scale=3;$sum/$filemax" |bc -l )
		until [ "$filemax" -lt "$filenumb" ]; do
			variance=([$filenumb]="$( echo "($average - ${value[$filenumb]})^2" |bc -l )")
			sum_variance=$( echo "$sum_variance+${variance[$filenumb]}" |bc -l )
			let filenumb+=1
		done
		STANDARD_DEVIATION=$( echo "scale=3; sqrt($sum_variance / $filemax) " |bc -l )
		#"
		if [ "$sdfile" == "" ]; then 
#			echo -e $list '\n' >> summary_rmsd.log
#		else
			echo -e $residue "  " $dirnumb '\t'  $allfiles"_m"$dirnumb '\t' RMSD $bestnumb '\t'  $bestrmsd '\t'	${value[1]} '\t' average $average '\t'  SD $STANDARD_DEVIATION '\t'$list >> summary_rmsd.log
			echo -e $residue "  " $dirnumb '\t'  $allfiles"_m"$dirnumb '\t' RMSD $bestnumb '\t'  $bestrmsd '\t'	${value[1]} '\t' average $average '\t'  SD $STANDARD_DEVIATION '\n'
			cat $allfiles"_m"$dirnumb"/ranked_"$allfiles"_m"$dirnumb"_"$bestnumb".sdf" >> best_rmsd.sdf
		fi
		bestrmsd=0;compbestrmsd=99999999;comprmsd=0;list="";sum=0;sum_variance=0
	done

	if [ "$sdfile" == "" ]; then 
		# make one SD file with best ranked docking solutions and sort it for residue
		cat $allfiles"_m"*"/ranked_"$allfiles"_m"*_1.sdf | uniq|\
		sort_strings_sdf -prop residue  > "ranked_"$allfiles"_m_best_sort.sdf"

		# grep #_best_RMSD best_rmsd best_gold average SD
		grep $allfiles summary_rmsd.log | awk '{ print $5, $6, $7, $9, $11 } ' > short.log 
		echo
	fi
fi #ranked

rm -f sdf1.sdf sdf2.sdf
##################################################################
###for grepping just rmsd values of output file (--sdfile):
###grep 'Exper' pred.temp|awk '{ print $2,$6,$10,$14,$18,$22 } ' > pred.xls

#echo $(date)
