#!/bin/bash

####################################################################################### 
#              REVERB  CHALLENGE -  automatic speech recognition                      # 
#                                                                                     # 
# scripts and tools written by:                                                       # 
# - Volker Leutnant,                                                                  # 
# - Marc Puels,                                                                       # 
# - Reinhold Haeb-Umbach                                                              # 
#                                                                                     # 
# Department of Communications Engineering, University of Paderborn, Germany          # 
#                                                                                     # 
# support: reverb-asr@lab.ntt.co.jp                                                   #
#######################################################################################

# split the list of input files into smaller subsets and process these subsets in parallel
# determine the number of parallel jobs by looking at the number of availabled cpu cores 
# e.g.
#  HCopy \
# 	-A -D -T 1 \
# 	-C ${CONFIG_HCOPY_COMMON} \
# 	-C ${CONFIG_HCOPY_MCWSJAV} \
# 	-S code.scp
# is now called by 
# parallelHTK ${NumThreads} HCopy \
# 	-A -D -T 1 \
# 	-C ${CONFIG_HCOPY_COMMON} \
# 	-C ${CONFIG_HCOPY_MCWSJAV} \
# 	-S code.scp
# the simple wrapper thus allows the code to easily be put back in the single thread version

# Set the number of processes to use
echo "$@"

nbProc=$1 
shift
# determine the HTK program to be called, its the second argument in the input string
echo $1 
prog=$1

if ( [ "$nbProc" == "1" ] ) || ( [ "$prog" != "HCopy" ] && [ "$prog" != "HVite" ] && [ "$prog" != "HERest" ] ); then
  echo "Single process"
  # fix the -l '*' issue 
  passedString=`echo "$@" | sed "s,-l \*,-l '\*',g"`
  # execute the possibly corrected argument string
  $passedString
else
  echo "Multi processes"
  # shift the input
  shift
  # find the -S argument indicating the list of files
  scpFile=''
  # find the -i argument indicating the output mlf
  outputMLF=''
  # find the -M argument indicating the target directory to store the new MMF in
  targetMMFDir=''
  # find the -H argument indicating the sourece MMF
  sourceMMF=''
  # the statistics file
  statsFile=''
  # memorize prev. command substring 
  prev=''

  # the argument string
  argumentString=''
  if [ "$prog" = "HCopy" ]; then
    for inputArg in "$@"; do
      if [ "$prev" = "-S" ]; then
	scpFile=$inputArg
      elif [ "$inputArg" = "-S" ]; then
	:
      else
	argumentString="$argumentString $inputArg"
      fi
      prev=$inputArg
    done
  elif [ "$prog" = "HVite" ]; then
    for inputArg in "$@"; do
      if [ "$prev" = "-S" ]; then
	scpFile=$inputArg
      elif [ "$prev" = "-l" ]; then
	# handle the special case '*'
	if [ "$inputArg" = "*" ]; then
	  addArg="-l '$inputArg'"
	else
	  addArg="-l $inputArg"
	fi
      elif [ "$prev" = "-i" ]; then
	outputMLF=$inputArg
      elif [[ "$inputArg" = "-S" ]] || [[ "$inputArg" = "-l" ]] || [[ "$inputArg" = "-i" ]]; then
	# noop
	:
      else
	# create argument string
	argumentString="$argumentString $inputArg"
      fi
      prev=$inputArg
    done
  elif [ "$prog" = "HERest" ]; then
    for inputArg in "$@"; do
      if [ "$prev" = "-S" ]; then
	scpFile=$inputArg
      elif [ "$prev" = "-s" ]; then
	statsFile=$inputArg
      elif [ "$prev" = "-M" ]; then
	targetMMFDir=$inputArg
      elif [ "$prev" = "-H" ]; then
	sourceMMF=$inputArg
      elif [[ "$inputArg" = "-S" ]] || [[ "$inputArg" = "-s" ]] || [[ "$inputArg" = "-M" ]] || [[ "$inputArg" = "-H" ]]; then
	# noop
	:
      else
	# create argument string
	argumentString="$argumentString $inputArg"
      fi
      prev=$inputArg
    done
    # in case HERest is called, the last argument is the HMMList
    hmmList=$prev
  else
    echo "Command $prog not supported yet!"
  fi


  # create unique directory
  tmpDir=`mktemp -d -p $(dirname $scpFile)`
  pushd . 
  # > /dev/null
  cd $tmpDir

  # Divide the task file for each process
  allLines=`wc -l $scpFile | cut -f 1 -d" "`
  lines=`expr $allLines / $nbProc`
  rm -f $scpFile.tmp*
  split -l $lines $scpFile ${scpFile}.tmp
  mv ${scpFile}.tmp* $tmpDir
  taskFiles=`ls *tmp* `


  count=0
  for currentTaskFile in $taskFiles; do
    subTask=`basename $currentTaskFile`
    let count=$count+1
    if [ "$prog" = "HCopy" ]; then
      HCopy $argumentString -S $currentTaskFile &
    elif [ "$prog" == "HVite" ]; then
      HVite $addArg -S $currentTaskFile -i ${outputMLF}_${subTask} $argumentString &
    elif [ "$prog" == "HERest" ]; then
      HERest -S $currentTaskFile -H $sourceMMF -M $targetMMFDir -p $count $argumentString &
    else
      echo "Command $prog not supported yet!"
    fi
  done
  # wait for the processes to join
  wait


  # join the outputs if necessary
  if [ "$prog" == "HVite" ]; then
    # in case HVite has been called, join the MLF to one big mlf and copy the result to the MLF originally intended for that purpose
    rm -f ${outputMLF}
    for currentTaskFile in $taskFiles; do
      subTask=`basename $currentTaskFile`
      cat ${outputMLF}_${subTask} >>  ${outputMLF}
    done
    # remove the shared mlf
    rm -f ${outputMLF}_*
  elif [ "$prog" == "HERest" ]; then
    # in case HERest has been called, join accumulated statistics
    if [ "$statsFile" = '' ]; then
      HERest -H $sourceMMF -M $targetMMFDir -p 0 $hmmList $targetMMFDir/*.acc
    else
      HERest -H $sourceMMF -M $targetMMFDir -s $statsFile -p 0 $hmmList $targetMMFDir/*.acc
    fi
    # remove accumulators
    rm -f $targetMMFDir/*.acc
  fi
  # remove the tmp dir
  popd > /dev/null
  rm -rf ${tmpDir}

fi