;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<--OGI-->;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;                                                                        ;;
;;             Center for Spoken Language Understanding                   ;;
;;        Oregon Graduate Institute of Science & Technology               ;;
;;                         Portland, OR USA                               ;;
;;                        Copyright (c) 2000                              ;;
;;                                                                        ;;
;;      This module is not part of the CSTR/University of Edinburgh       ;;
;;               release of the Festival TTS system.                      ;;
;;                                                                        ;;
;;  In addition to any conditions disclaimers below, please see the file  ;;
;;  "license_ogi_tts.txt" distributed with this software for information  ;;
;;  on usage and redistribution, and for a DISCLAIMER OF ALL WARRANTIES.  ;;
;;                                                                        ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;<--OGI-->;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;  Set up diphone voice
;;;  MWM diphones: male American English collected March 1997 by OGI

(require 'phrase)
(require 'pos)
(require 'tobi)
(require 'f2bf0lr)
(require 'ogi_worldbet_phones)
(require 'ogi_lexicons)
(require 'ogi_token)
(require 'ogi_postlex)
(require 'ogi_phrase)
(require 'ogi_lts_worldbet)
(require 'ogi_kddurtreeZ_wb)
(require 'ogi_hack)
(require 'ogi_unitsel)
(require 'ogi_synthesis)


;;;  Set up the MOBY+CMU lexicon
(setup_ogi_lex)

;;; Use the grouped file by default
(defvar mwmdi_grouped_or_ungrouped 'grouped)

;; Location at runtime
(defvar mwm_diphone_dir (cdr (assoc 'mwm_diphone voice-locations))
  "mwm_diphone_dir
  The default directory for the mwm diphone database.")
(set! load-path (cons (path-append mwm_diphone_dir "festvox") load-path))
 

(define (voice_mwm_diphone)
"(voice_mwm_diphone)
 Set up the current voice to be an American male MWM using
 the MWM diphone set and OGIresLPC."

  ;; reset global parameters
  (voice_reset)

  ;; Phone set
  (Parameter.set 'Language 'americanenglish)
  (Parameter.set 'PhoneSet 'worldbet)
  (PhoneSet.select 'worldbet)
  
  ;; Token to word rules
  (set! token_to_words english_token_to_words)
  (set! token_pos_cart_trees OGI_english_token_pos_cart_trees)

  ;; POS tagger
  (set! pos_lex_name "english_poslex")
  (set! pos_ngram_name 'english_pos_ngram)
  (set! pos_supported t)
  (set! guess_pos english_guess_pos)   ;; need this for accents
  (set! pos_map english_pos_map_wp39_to_wp20)
  ;(set! pos_map nil)

  ;; Lexicon selection
  (lex.select "ogi")
  
  ;; Postlexical rules (some defined in "ogi_postlex.scm")
  (set! postlex_vowel_reduce_cart_tree  OGI_postlex_vowel_reduce_cart_tree_hand) ;; must set this!
  (set! postlex_rules_hooks (list PostLex_Reduce The_Before_Vowel postlex_apos_s_check resyllabify_hack plosive_hacks flap_hack))
  
  ;; Phrase prediction
  (set! festival-phrasify Phrasify)
  (set! Phrasify OGI_PuncPhrasify)
  (Parameter.set 'Phrase_Method 'prob_models)  
  (set! phr_break_params OGI_english_phr_break_params)
  
  ;; Accent and tone prediction
  (set! int_tone_cart_tree f2b_int_tone_cart_tree)
  (set! int_accent_cart_tree f2b_int_accent_cart_tree)
  
  ;; F0 prediction
  (set! f0_lr_start f2b_f0_lr_start)
  (set! f0_lr_mid f2b_f0_lr_mid)
  (set! f0_lr_end f2b_f0_lr_end)
  (Parameter.set 'Int_Method Intonation_Tree)
  (set! int_lr_params
	'((target_f0_mean 115) (target_f0_std 20)
	  (model_f0_mean 170) (model_f0_std 34)))
  (Parameter.set 'Int_Target_Method Int_Targets_LR)
  
  ;; Duration prediction
  (set! duration_cart_tree kd_duration_cart_tree_wb)
  (set! duration_ph_info kd_durs_wb)
  (Parameter.set 'Duration_Method Duration_Tree_ZScores)
  (Parameter.set 'Duration_Stretch 1.05)

  ;; diphone unit selection fallbacks
  (set! ogi_di_alt_L '((m= (m)) (n= (n)) (l= (l)) (h (pau)) 
		       (j (i:)) (dx (t d)) (& (^))
		       (k>9r (k)) (k>w (k)) (k>l (k))
		       (p>9r (p)) (p>w (p)) (p>l (p)) 
		       (t>9r (t)) (t>w (t)) (t>l (t)) (t>9r<s (t>9r t<s t)) 
		       (p>9r<s (p>9r p)) (t<s (t))))

  (set! ogi_di_alt_R '((m= (m)) (n= (n)) (l= (l)) (h (pau)) 
		       (j (i:)) (dx (t d)) (& (^))
		       (k>9r (k)) (k>w (k)) (k>l (k))
		       (p>9r (p)) (p>w (p)) (p>l (p)) (p>l<s (p<s p))
		       (t>9r (t)) (t>w (t)) (t>l (t)) 
		       (t>9r<s (t>9r t<s t)) (p>9r<s (p>9r p))
		       (t<s (t))))

  (set! ogi_di_default "pau-h")

  (Parameter.set 'Synth_Method 'OGIdiphone)
  (OGIdbase.activate "mwm_diphone")      
  (OGIresLPC.init mwmdi_OGI_syn_params)  
  (set! current_voice_reset mwmdi_voice_reset)
  (set! current-voice 'mwm_diphone)  
)
(define (mwmdi_voice_reset)
  (set! Phrasify festival-phrasify)
  )

;; Context-dependent smoothing:
;;   (feat val int) for each
;;   Can put any feat,val pair here, as long as feat is defined in phoneset definition.
;;   Smoother setup algorithm iterates through list until feat, val is matched, then uses int as 
;;     smoothing window length.  This could be improved.
(set! mwmdi_smooth_spectra   ;;; max number of frames to use in smoothing LPC at joins
      '(spectra_smooth
        ((vc + 2)   ; vowel
         (ctype s 0) ; stop
         (ctype f 2) ; fricative
         (ctype a 0) ; affricate
         (ctype n 2) ; nasal
         (ctype l 2) ; lateral
         (ctype r 2) ; approximant
         )))
 (set! mwmdi_vqual
       '(vqual_mod
 	 ((vt_global_warp_wave 1.0)
 	  (vt_voiced_warp_wave 1.0)
 	  (vt_global_warp_lsf 1.0)
 	  (vt_voiced_warp_lsf 1.0))))
 
(set! mwmdi_smooth_power   ;;; max number of frames to use in smoothing power at joins
      '(power_smooth
        ((vc + 10)   ; vowel
         (ctype s 0) ; stop
         (ctype f 2) ; fricative
         (ctype a 0) ; affricate
         (ctype n 5) ; nasal
         (ctype l 5) ; lateral
         (ctype r 5) ; approximant 
         )))

(set! mwmdi_dump
      '(dump
	((srcseg  "srcseg.phn")
	 (srcexc  "srcexc")      ;; dumps .wav and joins (.phn)
	 (srcpm   "srcpm.est")   ;;  (exc pmarks)
	 (srcpm_as_wav "srcpm.wav")  
	 (srclpc  "srclpc.est")  ;;  (LSF, E, etc.)
	 (targexc "targexc.wav") ;; 
	 (targseg "targseg.phn")
	 (targjoin "targjoin.phn")
	 (targpm  "targpm.est")  ;; 
	 (targpm_as_wav "targpm.wav")  
	 (targlpc "targlpc.est") ;;  (LSF, E, etc.)
	 (output  "out.wav")     ;; 
	 (chunks  "chunk")       ;; dumps .wav and .phn
	 )))

;;
;;  Residual LPC synthesizer parameters - used by OGIresLPC.init
;;
(set! mwmdi_OGI_syn_params  
 (list
  '(F0_default 50.0)      ;; default Fo used if can't find any Fo targets
  '(T0_UV_thresh 0.020)   ;; used to make V/UV decision 
  '(T0_UV_pm 0.010)       ;; used to place UV pmarks
  '(post_gain 1.0)        ;; adjust final loudness
  '(deemphasis 0.94)      ;; opposite of preemphasis (lowpass filter)
  '(mod_method "direct")  ;; method for realizing prosodic targets
;  '(mod_method "none")  ;; method for realizing prosodic targets
  '(beta_smooth 5)        ;; smoother len for pitch mod factor in "soft" mod method
  '(smooth_cross_ph_join "Y") ;; smooth across joins at phone boundaries?
  '(window_type "trapezoid") ;; ola window
  '(spectra_match_or_replace "match") ;; for join smoothing
  '(power_match_or_replace   "match") ;; for join smoothing
  mwmdi_smooth_spectra
  mwmdi_smooth_power
  mwmdi_vqual
;  mwmdi_dump
  ))


(define (init_mwm_diphone)
"(init_mwm_diphone)
  Initialise the MWM diphone database.  This sets up the 16K version
  for residual excited LPC."
(if (equal? mwmdi_grouped_or_ungrouped 'ungrouped)

    ;;  ungrouped
    (OGIdbase.init
     (list
      '(dbname mwm_diphone)
      (list 'unitdic_file (path-append mwm_diphone_dir "festvox" "mwmdiph.ms"))
      (list 'gain_file (path-append mwm_diphone_dir "festvox" "mwmgain.dat"))
      '(phoneset "worldbet")  
      '(base_dir "/u/macon/TTS/tts_data/mwm/")
      '(lpc_dir "lpc16/")
      '(lpc_ext ".lpc")
      '(exc_dir "lpc16/")
      '(exc_ext ".res")
      '(pm_dir "pm/")
      '(pm_ext ".pmv")
      '(data_type "resLPC")
      '(access_mode "ondemand")
      '(samp_freq 16000)
      '(sig_band 0.010)
      '(isCompressed "Y") ;; if "Y", compress when saving group file
      '(preemph 0.96)  
      ))

    ;;  grouped -- parameters set here override those in groupfile
    (OGIdbase.init   
     (list
      '(dbname mwm_diphone)
      (list 'groupfile (path-append mwm_diphone_dir "group" "mwm_di_resLPC.group"))
      '(access_mode "ondemand")
      ))
    t)
)

(init_mwm_diphone)


(proclaim_voice
 'mwm_diphone
 '((language english)
   (gender male)
   (dialect american)
   (description
    "This voice provides an American English male voice using a
     residual excited LPC diphone synthesis module created at
     OGI.  It uses a lexicon compiled from MOBY and CMU lexicons, and
     other trained modules used by CSTR voices.")))

(provide 'mwm_diphone)





