#! /usr/bin/env ruby


#mafftpath = "_BINDIR/mafft"
mafftpath = "mafft"

require 'getopts'
require 'tempfile'


seed = 0
nout = 200
infn = ""
coreoptions = " --globalpair --maxiterate 100 "
addoptions = " "
if getopts( "s:", "p:", "i:", "c:", "a:" ) == nil || $OPT_h then
	puts "Usage: #{$0} [-t -s -e# -a# -c\"[options for mafft (core)]\" -a\"[options for mafft (add)]\"]"
	exit 1
end

if $OPT_s then
	seed = $OPT_s.to_i
end
if $OPT_p then
	nout = $OPT_p.to_i
end
if $OPT_i then
	infn = $OPT_i
end
if $OPT_c then
	coreoptions = $OPT_c
end
if $OPT_a then
	addoptions = $OPT_a
end


if infn == "" then
	STDERR.puts "Give input file with -i."
	exit 1
end

temp_uf = Tempfile.new("_uf").path
temp_nf = Tempfile.new("_nf").path
temp_cf = Tempfile.new("_cf").path
temp_of = Tempfile.new("_of").path



srand( seed )

def readfasta( fp, name, seq )
        nseq = 0
        tmpseq = ""
        while fp.gets
                if $_ =~ /^>/ then
                        name.push( $_.sub(/>/,"").strip )
                        seq.push( tmpseq ) if nseq > 0
                        nseq += 1
                        tmpseq = ""
                else
                        tmpseq += $_.strip
                end
        end
        seq.push( tmpseq )
        return nseq
end


tname = []
tseq = []
infp = File.open( infn, "r" )
tin = readfasta( infp, tname, tseq )
infp.close
ref = tseq[tin-1]

sai = []

if nout < tin 
	for i in 0..(tin-1)
		sai.push( 0 ) 
	end 
	for i in 0..(nout-1)
		while( 1 ) 
			s = rand( tin )
#			p s
			break if sai[s] == 0
	end 
		sai[s] = 1 
	end 
else
    for i in 0..(tin-1)
        sai.push( 1 ) 
    end 
end



ufp = File.open( temp_uf, 'w' )
nfp = File.open( temp_nf, 'w' )

i = 0
while i < tin
	if sai[i] == 1 then
		ufp.puts ">" + tname[i]
		ufp.puts tseq[i]
	else
		nfp.puts ">" + tname[i]
		nfp.puts tseq[i]
	end
	i += 1
end

nfp.close
ufp.close


system( mafftpath + " #{coreoptions} #{temp_uf} > #{temp_cf}" )
if nout < tin 
	system( mafftpath + " #{addoptions} --add #{temp_nf} #{temp_cf} > #{temp_of}" )
else
	system( "cp #{temp_cf} #{temp_of}" )
end

system( "cat #{temp_of}" )

STDERR.puts "Randomly selected #{nout} sequences were aligned with G-INS-i to generate a core MSA, and the remaining sequences were added to the core MSA."
system( "rm -rf #{temp_nf} #{temp_cf} #{temp_of} #{temp_uf}" )
