/*********************************************************************************/
/*Program: cut_sequence.c                                                        */
/*                                                                               */
/* Stefan Henrich                                                                */
/* cut_sequence.c                                                                */
/* Date: 18.05.2005   version 4                                                  */
/* 15.09.2005 v5: MAXPOINT 60,foutpdb, stdout                                    */
/*                                                                               */
/* Purpose:                                                                      */
/* cutting pdb files for ANAL/GOLPE based on sequence alignment of CLUSTAL W     */
/*                                                                               */
/*                                                                               */
/* argv[1] sequence file created by prep_interaction.sh                          */
/* argv[2] number of components of protein (extracting from alignment, phylib)   */
/* argv[3] number of components of ligand (extracting from alignment, phylib)    */
/*                                                                               */
/* output file: .cut.pdb                                                         */
/*                                                                               */
/*COMPILATION: cc -o cut_sequence cut_sequence.c                                 */
/*USAGE: >cut_sequence sequence.pir $(head -n1 sequence.phylib)                  */
/*********************************************************************************/
/* Das Programm soll die Anzahl der Sequenzen und deren Laenge von sequence.phy lesen
und sequence.pir lesen und in eine Matrix schreiben. Ueberall wo ein "-" steht wird 
der Sequenzbereich auf 0 gesetzt. Danach erfolgt das Lesen der PDB files und es werden nur 
die Reste rausgeschrieben, bei denen die Matrix an der Stelle !=0 ist.*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAXPOINT 60 /* the sequence alignment (pir) has 50 characters in each line */


int mod_and_remainder(int i) /* return i/MAXPOINT */
{
	int mod=0;
	while(i>=MAXPOINT){i-=MAXPOINT; mod++;}
	return(mod);
}

struct pdbdata
{
	char *filename;
	char *sequencestr;
	char *inpdb;
	char *outpdb;
};

int main (int argc, char **argv)
{
	FILE *fpirseq,*finpdb,*foutpdb;
	int number=atoi(argv[2]),residue=atoi(argv[3]);
	int i,j,k,mod,length,resinumb=0;
	char line[120],templine[120],strnumber[10],strresidues[10],pdb[1000],tmppdb[MAXPOINT];
	char matrix[number+14][residue+1],character; //keine Ahnung warum das nur mit number+14 funktioniert
	char *filename,*sequencestr,*inpdb,*outpdb;
	struct pdbdata pdbfiles[number+1];
	char strseq[residue];
	char resinew[]="      ",resiold[]="      ",fill[]="      ",buffer[]="      ";

	if((fpirseq = fopen(argv[1] , "r"))==NULL) {printf("pirseq\n");exit(2);}

	mod=mod_and_remainder(residue);  
	printf("mod %i\n",mod);
	printf("Number of sequences %i\n",number);
	printf("Number of residue %i\n",residue);
	printf("Number of MAXPOINTS %i\n",MAXPOINT);

//writing '0' into first row
		filename = malloc(1);
		*filename='\0';
		pdbfiles[0].filename = filename;

		for(j=1;j<=residue;j++)	matrix[0][j]='1';


//	reading sequence.pir
	for(i=1;i<=number;i++)	// number of pdb files
	{
// read file name
// line 1
		fscanf(fpirseq,"%s\n",line);	//reads first line with sequence name
		length=strlen(line+4);
		filename = malloc(length+1);
		strcpy(filename,line+4);
		pdbfiles[i].filename = filename;
		pdbfiles[i].filename[length] = '\0';
		printf("%i 1 line %s\n%s\n",i,line,pdbfiles[i].filename);

// read sequence		
		for(j=0;j<=mod;j++) // lines of sequence
		{
			for(k=1;k<=MAXPOINT;k++) // characters of line
			{
				if((j*MAXPOINT+k)<=residue) {
					fscanf(fpirseq,"%c",); 
					printf("%c",character); 
					matrix[i][j*MAXPOINT+k]=character;
					if( character=='-' ) matrix[0][j*MAXPOINT+k]='0';
				}
			}
			fgets(line,MAXPOINT+2,fpirseq);
			printf("\n");
		}
		fgets(line,MAXPOINT+2,fpirseq); // read *
	}

///////////////
// write
///////////////				
	for(j=1;j<=residue;j++) printf("%c",matrix[0][j]); printf("\n"); //write sequence 0
	
	for(i=1;i<=number;i++)
	{
		printf("\n%s\n",pdbfiles[i].filename);
		length=strlen(pdbfiles[i].filename);

		finpdb=fopen(pdbfiles[i].filename,"r");

		outpdb = malloc(length+50);
		pdbfiles[i].outpdb=pdbfiles[i].filename;
//		strncat(pdbfiles[i].outpdb,".pdb",4);
		strncpy(pdbfiles[i].outpdb+(length-3),"cut.pdb",8);
		
//		finpdb=fopen(pdbfiles[i].inpdb,"r");
		foutpdb=fopen(pdbfiles[i].outpdb,"w");

//write sequences to stdout
		for(j=1;j<=residue;j++) 
		{
			if(matrix[0][j]=='1') 
			{printf("%c",matrix[i][j]);}
			else {printf("-");}
		}
		printf("\n");

//write lines in pdb files
		j=1;
		while(fgets(line,100,finpdb))
		{
			strncpy (templine,line,100);
			//get string of residue number and extension
			if(strncmp(line,"ATOM  ",6)==0) strncpy(resinew,line+22,5); 
			
			//jump over -
			if((strncmp(line,"ATOM  ",6)==0) && strcmp(resinew,resiold)!=0 && j<=residue)
			{				
//				fprintf(foutpdb,"0) j %i\n0) %s",j,line);		
				while(matrix[i][j]=='-' && j<=residue+1) {j++;}
//				j++;
//				fprintf(foutpdb,"1) %c %c residue %s j %i\n1) %s",matrix[0][j],matrix[i][j],resinew,j,line);		
//				fprintf(foutpdb,"1) j %i\n1) %s",j,line);		
			}
			
			//print ATOM lines of protein
			if((strncmp(line,"ATOM  ",6)==0) && j<=residue && matrix[0][j]=='1' && strcmp(resinew,resiold)!=0) 
			{
				fprintf(foutpdb,"%s",line);
//				fprintf(foutpdb,"2) %c %c residue %s j %i\n2) %s",matrix[0][j],matrix[i][j],resinew,j,line);
			}
			if((strncmp(line,"ATOM  ",6)==0) && j<=residue && matrix[0][j-1]=='1' && strcmp(resinew,resiold)==0) 
			{
				fprintf(foutpdb,"%s",line);
//				fprintf(foutpdb,"2) %c %c residue %s j %i\n2) %s",matrix[0][j],matrix[i][j],resinew,j,line);
			}

			//print non-ATOM lines
			if(strncmp(line,"ATOM  ",6)!=0)		
			{
				fprintf(foutpdb,"%s",line);
//				fprintf(foutpdb,"3) %c %c residue %s j %i\n3) %s",matrix[0][j],matrix[i][j],resinew,j,line);
			}

			
			if((strncmp(line,"ATOM  ",6)==0) && strcmp(resinew,resiold)!=0)
			{				
				j++;
			}

			//print ATOM lines, but not protein
			if((strncmp(line,"ATOM  ",6)==0) && j>residue+1) 
			{
				fprintf(foutpdb,"%s",line);
//				fprintf(foutpdb,"4) %c %c resinew %s residue %i j %i\n4)%s",matrix[0][j],matrix[i][j],resinew,residue,j,line);
			}
 
			strcpy(resiold,resinew); //store residue number for next cycle
		}
		
//		fclose(finpdb);
		strcpy(resinew,"      ");
		strcpy(resiold,"      ");
	}
//	for(j=1;j<=residue;j++) printf("%c",matrix[0][j]); printf("\n"); //write sequence 0

	fclose(fpirseq);  
	return(0);
}

