#!/bin/bash -e

####################################################################################### 
#              REVERB  CHALLENGE -  automatic speech recognition                      # 
#                                                                                     # 
# scripts and tools written by:                                                       # 
# - Volker Leutnant,                                                                  # 
# - Marc Puels,                                                                       # 
# - Reinhold Haeb-Umbach                                                              # 
#                                                                                     # 
# Department of Communications Engineering, University of Paderborn, Germany          # 
#                                                                                     # 
# support: reverb-asr@lab.ntt.co.jp                                                   #
#######################################################################################

. printlib


# check for existing file and exit if file is missing; 
# preventing screen from being flooded by error messages
function stop_on_missing_file
{
  if [ ! -f $1 ]; then
    print_msg "Cannot find $1! Aborting"
    exit 1
  fi
}

print_header "$0"

print_subsec 'Configuration'
#Configure paths by editing LOCAL_CONFIG, then run it
if [ -e LOCAL_CONFIG ] ; then
    . LOCAL_CONFIG
else
    print_msg 'Copy LOCAL_CONFIG.template to LOCAL_CONFIG and adapt paths as needed.'
    exit 1
fi

# tmpDir=`mktemp -d -p ${WORKPATH}`
# pushd . > /dev/null
# cd $tmpDir


# there is one reference MLF in the WSJLIB folder; created by mcwsjav_prepare_transcriptions
refMLF=${WSJLIB}/wlabs/mcwsjav/MC_WSJ_AV.mlf

# check for existing results directory
if [[ -d ${MCWSJAVRESULTS_MC} ]]; then
   print_msg "The results directory\n" \
	     "${MCWSJAVRESULTS_MC}\n" \
	     "already exists!\n " \
	     "If you decide to proceed, its content will possibly be overwritten!"
   read -p "Continue? (Y/N)? " REPLY
   if [[ "$REPLY" = "Y" ]] || [[ "$REPLY" == "y" ]]; then
     print_msg "Continuing!"
   else
     print_msg "Exiting!"
     exit 1
   fi
else
  mkdir -p ${MCWSJAVRESULTS_MC}
fi

# Parameters for recognition
beamPruningThreshold=250.0 
insertionPenalty=0.0
languageModelWeight=14.0
numberOfGaussians=12
wordEndPruningThreshold=200.0

condition=reverb
# the language model file
LM=${WSJLIB}/nets/5cnvp.net
# the monophone dictionary
VOCAB=${WSJLIB}/dicts/5cnvpmono.dict
# the word internal triphone list (after decision tree based clustering)
HMMLIST=$REVERBWSJHMMS/W_${condition}/winttree.list
# the master macro file
MMF=$REVERBWSJHMMS/W_${condition}/hmm${numberOfGaussians}4/MMF

# List of recognition tasks
tasks="near far"

# List of test sets
testSets=$TASKSET #"dt et"

# Directory where the feature lists are located
flistDir=${WSJLIB}/flists/mcwsjav

for task in $tasks;
do
  for testSet in $testSets;
  do
    taskName=RealData_${testSet}_for_1ch_${task}_room1_A
    
    # assign list of files
    featureFilesList=${flistDir}/$taskName.lst

    # assign the MLF to keep the recognizer output
    resMLF=${MCWSJAVRESULTS_MC}/$(basename $featureFilesList | sed -e 's/lst$/mlf/g') 
    
    print_subsub "Recognize data set $testSet using feature file list:\n ${featureFilesList}"
    # call HVite with the appropriate parameters
    parallelHTK $NBPROC HVite \
	-A -D -T 1 \
	-t ${beamPruningThreshold} \
	-v ${wordEndPruningThreshold} \
	-s ${languageModelWeight} \
	-p ${insertionPenalty} \
	-i ${resMLF} \
	-w ${LM} \
	-S ${featureFilesList} \
	-H ${MMF} \
	${VOCAB} \
	${HMMLIST}
    
    # check 
    stop_on_missing_file ${resMLF}
    
    print_msg "Evaluate using reference MLF: \n ${refMLF}"
    resLog=${MCWSJAVRESULTS_MC}/$(basename $featureFilesList | sed -e 's/lst$/log/g')
    
    # evaluate the MLF output by the recognizer by comparing it against the reference MLF
    # minor issue: some speaker have a two-digit ID, some only a one digit ID
    #              ignore this issue --> speaker T10 will be 10 in the log file
    #                                --> speaker T6 will be 6c in the log file

    HResults \
	-A -D -T 1 \
	-h \
	-n \
	-k *_T%%* \
	-z \!NULL \
	-e \!NULL \!SENT_START \
	-e \!NULL \!SENT_END \
	-I ${refMLF} \
	${VOCAB} \
	${resMLF} \
	> ${resLog}
    
  # check 
    stop_on_missing_file ${resLog}
  done
done  
# popd > /dev/null
# rm -rf $tmpDir
