#!/usr/bin/perl
use strict;
use warnings;
use utf8;


print "please input the .vcf/.vcf.gz format files:\n";
my $VCF = <STDIN>;
chomp($VCF);
$VCF=~s/^\s*|\s*$//g;

my $filename = "<$VCF";
if($VCF =~/\.gz/){
   $filename = "gzip -dc $VCF|";
}
open (VFH, $filename) or die("No such file");  

my %key_hash=(); 
my %format_hash=(); 
my @head=(); 
while ( <VFH> ){           ## open vcf 	
	chomp($_);
	my $line = $_;
	if($line =~/^##fileformat/){
		#print "$line\n"; 
		push @head, $line;
	}
	elsif($line =~/^##contig/){
		$line =~s/contig=<ID=//; $line =~s/length=//; $line =~s/>//;
		$line =~s/,/=/;
		#print "$line\n"; 
		push @head, $line;
	}
	elsif($line =~/^##reference=/){
		#print "$line\n"; push @head, $line;
	}	
	elsif($line =~/^#CHROM/i){
		my @vcf = split ("\t", $_);  
		for(my $i=0; $i<=$#vcf; $i++){ 
			my $key=$vcf[$i];
			$key_hash{$key} = $i;
		}
	}
	if($line =~/^Chr\d+|^\d+/i ){
		my @vcf = split ("\t", $_);  
		my $f = $key_hash{"FORMAT"};
		push @head, "##FORMAT=<".$vcf[$f].">";
		my @format = split(":", $vcf[$f]);
		for (my $i=0; $i<=$#format; $i++){ 
			my $key = $format[$i];
			$format_hash{$key} = $i;
		}
		#print "##FORMAT=<".$vcf[$f].">";
		last;
	}
}
close VFH;

my $maxklg=0;
my $klg = 0;
foreach (keys %key_hash){
	$klg = length($_);
	$maxklg = $klg if($klg>$maxklg);
}

my $ctrl = 0;

print "Select One Sample From below list: \n";
foreach my $key (sort {$key_hash{$a} <=> $key_hash{$b}} keys %key_hash){
	if($ctrl == 0 && $key =~/FORMAT/i){	 $ctrl =1; next; }
	if($ctrl == 0 && $key =~/Sample/i ){ $ctrl =1; }
	if($ctrl>0){
		$klg = length($key);
		print $key," "x($maxklg-$klg),"\t";
		if(($ctrl%10)==0){ print "\n"}
		$ctrl +=1;
	}
}


INPUT:
print "\nPleace Input Selected Sample Name: \n"; 
my $snm = <STDIN>;
chomp($snm);
$snm=~s/^\s*|\s*$//g;

if(! exists $key_hash{$snm}){
	goto INPUT
}
open(OUT, ">$snm".".vcf");
print OUT join("\n", @head),"\n"; 
print OUT "##REF=<+:InDel missing, -:SNP missing>\n";
print OUT "#CHROM\tPOS\tREF\tGT\tAD\tDP\tGQ\n";
my $line_num =0;
 open(VFH, $filename) or die ("can not open $filename");      
 while (<VFH>){
    next if ($_ =~/^#/i);
	chomp($_);
	my @vcf = split ("\t", $_);
	my ($c, $p, $r, $a, $s) = ($key_hash{"#CHROM"},  $key_hash{"POS"},  $key_hash{"REF"}, $key_hash{"ALT"},  $key_hash{$snm});
	next if($vcf[$s] =~/^0\/0:/);
	$vcf[$c] =~s/Chr//i;
	if($vcf[$s] =~ /^\.\/\./){    ## Missing
		if(length($vcf[$r]) > 1 or $vcf[$a]=~/[ATGC]{2,3}/){
			print OUT "$vcf[$c]\t$vcf[$p]\t+\n";  ## indel missing
		}
		else{
			print OUT "$vcf[$c]\t$vcf[$p]\t-\n";  ## snp missing
		}
	}
	else{
		my @f = split(":", $vcf[$s]);
		my $GT = $f[$format_hash{"GT"}];
		my $AD = $f[$format_hash{"AD"}];
		my $DP = $f[$format_hash{"DP"}];
		my $GQ = $f[$format_hash{"GQ"}];
		my @a = split(",", $vcf[$a]);
		my @s = split("\/", $GT);
		my @ad=split(",", $AD);
		if($s[0] eq $s[1]){
			$GT = $a[$s[0]-1];
			$AD = $ad[$s[0]];
		}
		else {
			if($s[0] eq '0'){
				$GT = $vcf[$r]."/". $a[$s[1]-1];
				$AD = $ad[$s[1]];
			}
			else{
				$GT = $a[$s[0]-1]."/". $a[$s[1]-1];
				$AD = $ad[$s[0]] + $ad[$s[1]];
			}
		}
		print OUT "$vcf[$c]\t$vcf[$p]\t$vcf[$r]\t$GT\t$AD\t$DP\t$GQ\n";
	}
	print "Read line: $line_num...\n" if(($line_num++%100000)==0);
 }
 close VFH;
 
my $os  =  $^O;
print "Current OS is $os\n";
if($os  eq  "linux"){
    system "tar -zcvf $snm.vcf.tgz $snm.vcf";
}
