kaldi+解碼器詳解2(實時流)

下面是對kaldi音頻文件解碼的改造,用于實時流解碼。

static void DecodeSegment(bool &more_data, int32 chunk_length,
                            BaseFloat traceback_period_secs) {
  
  OnlineNnet2FeaturePipeline feature_pipeline(*(this->feature_info));
  feature_pipeline.SetAdaptationState(*(this->adaptation_state));
  SingleUtteranceNnet3Decoder decoder(*(this->decoder_opts),
                                      *(this->trans_model), 
                                      *(this->decodable_info_nnet3),
                                      *(this->decode_fst),
                                      &feature_pipeline);
  OnlineSilenceWeighting silence_weighting(*(this->trans_model),
          *(this->silence_weighting_config));

  Vector<BaseFloat> wave_part = Vector<BaseFloat>(chunk_length);
  std::vector<std::pair<int32, BaseFloat> > delta_weights;
  BaseFloat last_traceback = 0.0;
  BaseFloat num_seconds_decoded = 0.0;
  while (true) {
    more_data = this->audio_source->Read(&wave_part);

    feature_pipeline.AcceptWaveform(this->sample_rate, wave_part);
    if (!more_data) {
      feature_pipeline.InputFinished();
    }

    if (silence_weighting.Active() && 
        feature_pipeline.IvectorFeature() != NULL) {
      silence_weighting.ComputeCurrentTraceback(decoder.Decoder());
      silence_weighting.GetDeltaWeights(feature_pipeline.IvectorFeature()->NumFramesReady(), 
                                        &delta_weights);
      feature_pipeline.IvectorFeature()->UpdateFrameWeights(delta_weights);
    }

    decoder.AdvanceDecoding();
    num_seconds_decoded += 1.0 * wave_part.Dim() / this->sample_rate;
    this->total_time_decoded += 1.0 * wave_part.Dim() / this->sample_rate;
 
    if (!more_data) {
      break;
    }
    if (this->do_endpointing
        && (decoder.NumFramesDecoded() > 0)
        && decoder.EndpointDetected(*(this->endpoint_config))) {
      break;
    }

    if ((num_seconds_decoded - last_traceback > traceback_period_secs)
        && (decoder.NumFramesDecoded() > 0)) {
      Lattice lat;
      decoder.GetBestPath(false, &lat);
      DecodePartialResult(lat);
      last_traceback += traceback_period_secs;
    }
  }

  if (num_seconds_decoded > 0.1) {
    decoder.FinalizeDecoding();
    CompactLattice clat;
    bool end_of_utterance = true;
    decoder.GetLattice(end_of_utterance, &clat);

    int32 num_words = 0;
    DecoderFinalResult(clat, &num_words);
    if (num_words >= this->min_words_for_ivector) {
      // Only update adaptation state if the utterance contained enough words
      feature_pipeline.GetAdaptationState(this->adaptation_state);
    }
  } else {
      KALDI_VLOG("Less than 0.1 seconds decoded, discarding");
  }
}
?著作權(quán)歸作者所有,轉(zhuǎn)載或內(nèi)容合作請聯(lián)系作者
【社區(qū)內(nèi)容提示】社區(qū)部分內(nèi)容疑似由AI輔助生成,瀏覽時請結(jié)合常識與多方信息審慎甄別。
平臺聲明:文章內(nèi)容(如有圖片或視頻亦包括在內(nèi))由作者上傳并發(fā)布,文章內(nèi)容僅代表作者本人觀點,簡書系信息發(fā)布平臺,僅提供信息存儲服務(wù)。

相關(guān)閱讀更多精彩內(nèi)容

友情鏈接更多精彩內(nèi)容