Quellcode durchsuchen

添加语音转文字 SDK 要使用文件,TODO

jsonwang vor 3 Jahren
Ursprung
Commit
aa85dfc557

+ 6 - 3
Introduce.xcodeproj/project.pbxproj

@@ -3,10 +3,11 @@
 	archiveVersion = 1;
 	classes = {
 	};
-	objectVersion = 45;
+	objectVersion = 51;
 	objects = {
 
 /* Begin PBXBuildFile section */
+		419CF3B7276111DC00C73BDA /* Resources.bundle in Resources */ = {isa = PBXBuildFile; fileRef = 419CF3B6276111DC00C73BDA /* Resources.bundle */; };
 		4A426DDD275484DF00B3733B /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 4A426DDC275484DF00B3733B /* AppDelegate.swift */; };
 		4A426DE4275484DF00B3733B /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 4A426DE2275484DF00B3733B /* Main.storyboard */; };
 		4A426DE6275484E000B3733B /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 4A426DE5275484E000B3733B /* Assets.xcassets */; };
@@ -22,6 +23,7 @@
 /* Begin PBXFileReference section */
 		3051DB0957784182F3E1AAD3 /* Pods-Introduce.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-Introduce.release.xcconfig"; path = "Target Support Files/Pods-Introduce/Pods-Introduce.release.xcconfig"; sourceTree = "<group>"; };
 		3D67E85C017389B87957100F /* Pods_Introduce.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_Introduce.framework; sourceTree = BUILT_PRODUCTS_DIR; };
+		419CF3B6276111DC00C73BDA /* Resources.bundle */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.plug-in"; path = Resources.bundle; sourceTree = "<group>"; };
 		4A426DD9275484DF00B3733B /* Introduce.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Introduce.app; sourceTree = BUILT_PRODUCTS_DIR; };
 		4A426DDC275484DF00B3733B /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = "<group>"; };
 		4A426DE3275484DF00B3733B /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = "<group>"; };
@@ -92,6 +94,7 @@
 		4A426DF02754A0D100B3733B /* Aarchitecture */ = {
 			isa = PBXGroup;
 			children = (
+				419CF3B6276111DC00C73BDA /* Resources.bundle */,
 				4A426DDC275484DF00B3733B /* AppDelegate.swift */,
 				4A426DF12754A1E300B3733B /* INTabbarController.swift */,
 			);
@@ -161,7 +164,6 @@
 		4A426DD1275484DF00B3733B /* Project object */ = {
 			isa = PBXProject;
 			attributes = {
-				BuildIndependentTargetsInParallel = 1;
 				LastSwiftUpdateCheck = 1310;
 				LastUpgradeCheck = 1310;
 				TargetAttributes = {
@@ -171,7 +173,7 @@
 				};
 			};
 			buildConfigurationList = 4A426DD4275484DF00B3733B /* Build configuration list for PBXProject "Introduce" */;
-			compatibilityVersion = "Xcode 13.0";
+			compatibilityVersion = "Xcode 9.3";
 			developmentRegion = en;
 			hasScannedForEncodings = 0;
 			knownRegions = (
@@ -196,6 +198,7 @@
 				4A426DE9275484E000B3733B /* LaunchScreen.storyboard in Resources */,
 				4A426DE6275484E000B3733B /* Assets.xcassets in Resources */,
 				4A426DE4275484DF00B3733B /* Main.storyboard in Resources */,
+				419CF3B7276111DC00C73BDA /* Resources.bundle in Resources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};

+ 88 - 0
Introduce/Aarchitecture/Resources.bundle/cei.json

@@ -0,0 +1,88 @@
+{
+  "cei":{
+	"cei_param_version":"2.2.0",
+    "cei_param_device_type":"phone",
+	"cei_param_reco_mode":0,
+	"cei_param_log_level":2,
+	"cei_param_debug_path":"asr_debug",
+	"cei_param_is_debug_enable":false,
+	"cei_param_resource_path":"",
+	"cei_param_resource_is_multi_language":false,
+	"cei_param_audio_format_str":"16k16bitmono",
+	"cei_param_mcs_mode":0,
+	"cei_param_work_mode":0,
+	"cei_param_max_cache_frames":1000,
+	"cei_param_is_aec_bf_active":false,
+	"cei_param_is_agc_active":false,
+	"cei_param_is_vad_active":true,
+	"cei_param_is_kws_active":true,
+	"cei_param_is_sr_active":true
+  },
+  "asp":{
+	"asp_param_is_process_parallel":false,
+	"asp_param_is_input_debug_enable":false,
+	"asp_param_is_output_debug_enable":false,
+	"asp_param_debug_path":"asr_debug",
+	"asp_param_is_callback_enable":false,
+	"asp_param_callback_period_frames":5
+  },
+  "vad":{
+	"vad_param_is_input_debug_enable":false,
+	"vad_param_is_output_debug_enable":false,
+	"vad_param_debug_path":"asr_debug",
+	"vad_param_asleep_speech_noise_thres":-0.6,
+	"vad_param_awake_speech_noise_thres":-0.5,
+	"vad_param_asleep_max_speech_segment_time":300000,
+	"vad_param_awake_max_speech_segment_time":10000,
+	"vad_param_asleep_block_size":3,
+	"vad_param_awake_block_size":3,
+	"vad_param_front_timeout_interval":8000,
+	"vad_param_tail_timeout_interval":800,
+	"vad_param_is_detect_start":true,
+	"vad_param_is_detect_end":true
+  },
+  "kws":{
+	"kws_param_is_input_debug_enable":false,
+	"kws_param_is_output_debug_enable":false,
+	"kws_param_debug_path":"asr_debug",
+	"kws_param_is_process_parallel":false,
+	"kws_param_front_extend_frames":10,
+	"kws_param_tail_extend_frames":5,
+	"kws_param_encoder_type_str":"feat",
+	"kws_param_encoder_bitrate":16000,
+	"kws_param_encoder_complexity":2,
+	"kws_param_callback_period_ms":100,
+	"kws_param_max_frames_per_callback":25,
+	"kws_param_max_bytes_per_callback":16000
+  },
+  "sr":{
+	"sr_param_is_input_debug_enable":false,
+	"sr_param_is_output_debug_enable":false,
+	"sr_param_debug_path":"asr_debug",
+	"sr_param_is_itn_enable":true,
+	"sr_param_is_do_conf_filter":false,
+	"sr_param_is_process_parallel":true,
+	"sr_param_is_need_result":false,
+	"sr_param_is_need_voice":true,
+	"sr_param_ngram_conf_thres":65.0,
+	"sr_param_jsgf_conf_thres":65.0,
+	"sr_param_encoder_type_str":"opus",
+	"sr_param_encoder_bitrate":16000,
+	"sr_param_encoder_complexity":2,
+	"sr_param_callback_period_ms":100,
+	"sr_param_max_frames_per_callback":25,
+	"sr_param_max_bytes_per_callback":16000
+  },
+  "ou":{
+	"oss_upload_param_is_enable":false,
+	"oss_upload_param_asp_in":false,
+	"oss_upload_param_asp_out":false,
+	"oss_upload_param_vad_in":false,
+	"oss_upload_param_vad_out":false,
+	"oss_upload_param_kws_in":false,
+	"oss_upload_param_kws_susp_in":false,
+	"oss_upload_param_kws_out":false,
+	"oss_upload_param_kws_susp_out":false,
+	"oss_upload_param_sr_in":false
+  }
+}

+ 3 - 0
Introduce/Aarchitecture/Resources.bundle/copylist.txt

@@ -0,0 +1,3 @@
+tts
+cei.json
+nui.json

+ 71 - 0
Introduce/Aarchitecture/Resources.bundle/mandarin/api.cfg

@@ -0,0 +1,71 @@
+# decoder wapper
+--AlsApi.monolist=./lexicon/phones.txt
+--AlsApi.baseLexicon=./lexicon/lexicon.bin
+--AlsApi.pronRemap=./lexicon/pinyin2phone.txt
+--AlsApi.acousticModel=./am2/am
+--AlsApi.obvModel=./am2/am
+--AlsApi.obvModelQuant=16bit
+--AlsApi.logFile=asr.log
+--AlsApi.outputFile=asr.txt
+--AlsApi.nBest=10
+--AlsApi.lazyEvaluate=false
+--AlsApi.nBlockSize=8
+--AlsApi.frameSkipNum=1
+--AlsApi.grammarDet=true
+--AlsApi.printStats=false
+--AlsApi.obvModelFormat=kaldi_nnet1
+--AlsApi.logLikelihoodCache=true
+
+# decoder core
+--Decoder.doLatticeGeneration=false
+--Decoder.useLMLA=true
+--Decoder.acousticScale=1.0f
+--Decoder.lmScale=1.0f
+--Decoder.wordEndPenalty=0.0f
+--Decoder.globalBeam=80.0f
+--Decoder.globalPreAcousticBeam=20000.0f
+--Decoder.wordEndBeam=100.0f
+--Decoder.wordBoundaryBeam=100.0f
+--Decoder.lmStateBeam=40.0f
+--Decoder.maxLmStatePerNode=5
+--Decoder.maxActiveToken=1000
+--Decoder.minActiveToken=10
+--Decoder.printStats=false
+--Decoder.memPoolReallocateSize=400
+--Decoder.latMemPoolReallocateSize=1024
+--Decoder.confidenceScale=0.99
+--Decoder.confidenceOffset=-4.24
+--Decoder.doConfidenceCalc=true
+--Decoder.subtraceLevel=1
+--Decoder.useLmCache=false
+--Decoder.useSentStartCache=true
+--Decoder.printFrmStats=false
+--Decoder.useAdaptiveBeam=true
+--Decoder.adaptiveBeamDelta=5.0f
+--Decoder.adaptiveLmStateBeamDelta=30.0f
+
+# lts
+--Lts.maxPronPerWord=16
+--Lts.addSilEachWord=true
+--Lts.addSilWholeWord=false
+--Lts.debugDumpLts=false
+
+#lmla
+--Lmla.historyLimit=0
+--Lmla.useSparseScore=true
+--Lmla.cacheSizeHighMark=500
+--Lmla.cacheSizeLowMark=500
+
+# features 
+--ContextExpansion::minus=8
+--ContextExpansion::plus=8
+--Decimate::begin-frame=0
+--output-type=FE_LOGFB+P+Dec3
+--input-type=FE_16K_16BIT_PCM
+--Waveform2Filterbank::htk-compat=false
+--Waveform2Filterbank::window-type=hamming # disable Dans window, use the standard
+--Waveform2Filterbank::use-energy=false    # only fbank outputs
+--Waveform2Filterbank::dither=1
+--Waveform2Filterbank::num-mel-bins=40     # 8 filters/octave, 40 filters/16Khz as used by IBM
+--Waveform2Filterbank::sample-frequency=16000
+

+ 88 - 0
Introduce/Aarchitecture/Resources.bundle/mandarin/cei.json

@@ -0,0 +1,88 @@
+{
+  "cei":{
+	"cei_param_version":"2.2.0",
+    "cei_param_device_type":"car",
+	"cei_param_reco_mode":0,
+	"cei_param_log_level":2,
+	"cei_param_debug_path":"asr_debug",
+	"cei_param_is_debug_enable":false,
+	"cei_param_resource_path":"",
+	"cei_param_resource_is_multi_language":true,
+	"cei_param_audio_format_str":"16k16bitmono",
+	"cei_param_mcs_mode":0,
+	"cei_param_work_mode":0,
+	"cei_param_max_cache_frames":1000,
+	"cei_param_is_aec_bf_active":false,
+	"cei_param_is_agc_active":false,
+	"cei_param_is_vad_active":true,
+	"cei_param_is_kws_active":true,
+	"cei_param_is_sr_active":true
+  },
+  "asp":{
+	"asp_param_is_process_parallel":false,
+	"asp_param_is_input_debug_enable":false,
+	"asp_param_is_output_debug_enable":false,
+	"asp_param_debug_path":"asr_debug",
+	"asp_param_is_callback_enable":false,
+	"asp_param_callback_period_frames":5
+  },
+  "vad":{
+	"vad_param_is_input_debug_enable":false,
+	"vad_param_is_output_debug_enable":false,
+	"vad_param_debug_path":"asr_debug",
+	"vad_param_asleep_speech_noise_thres":-0.8,
+	"vad_param_awake_speech_noise_thres":-0.5,
+	"vad_param_asleep_max_speech_segment_time":300000,
+	"vad_param_awake_max_speech_segment_time":60000,
+	"vad_param_asleep_block_size":3,
+	"vad_param_awake_block_size":3,
+	"vad_param_front_timeout_interval":8000,
+	"vad_param_tail_timeout_interval":800,
+	"vad_param_is_detect_start":true,
+	"vad_param_is_detect_end":true
+  },
+  "kws":{
+	"kws_param_is_input_debug_enable":false,
+	"kws_param_is_output_debug_enable":false,
+	"kws_param_debug_path":"asr_debug",
+	"kws_param_is_process_parallel":false,
+	"kws_param_front_extend_frames":10,
+	"kws_param_tail_extend_frames":5,
+	"kws_param_encoder_type_str":"opu",
+	"kws_param_encoder_bitrate":16000,
+	"kws_param_encoder_complexity":2,
+	"kws_param_callback_period_ms":100,
+	"kws_param_max_frames_per_callback":25,
+	"kws_param_max_bytes_per_callback":16000
+  },
+  "sr":{
+	"sr_param_is_input_debug_enable":false,
+	"sr_param_is_output_debug_enable":false,
+	"sr_param_debug_path":"asr_debug",
+	"sr_param_is_itn_enable":true,
+	"sr_param_is_do_conf_filter":false,
+	"sr_param_is_process_parallel":true,
+	"sr_param_is_need_result":false,
+	"sr_param_is_need_voice":true,
+	"sr_param_ngram_conf_thres":65.0,
+	"sr_param_jsgf_conf_thres":65.0,
+	"sr_param_encoder_type_str":"opu",
+	"sr_param_encoder_bitrate":16000,
+	"sr_param_encoder_complexity":2,
+	"sr_param_callback_period_ms":100,
+	"sr_param_max_frames_per_callback":25,
+	"sr_param_max_bytes_per_callback":16000
+  },
+  "ou":{
+	"oss_upload_param_is_enable":false,
+	"oss_upload_param_asp_in":false,
+	"oss_upload_param_asp_out":false,
+	"oss_upload_param_vad_in":false,
+	"oss_upload_param_vad_out":false,
+	"oss_upload_param_kws_in":false,
+	"oss_upload_param_kws_susp_in":false,
+	"oss_upload_param_kws_out":false,
+	"oss_upload_param_kws_susp_out":false,
+	"oss_upload_param_sr_in":false
+  }
+}

BIN
Introduce/Aarchitecture/Resources.bundle/mandarin/kws.bin


+ 29 - 0
Introduce/Aarchitecture/Resources.bundle/mandarin/vad/vad.cfg

@@ -0,0 +1,29 @@
+--output-type=FE_LOGFB+P
+--input-type=FE_16K_16BIT_PCM
+--Waveform2Filterbank::sample-frequency=16000
+--Waveform2Filterbank::dither=1
+--Waveform2Filterbank::htk-compat=false
+--Waveform2Filterbank::window-type=hamming # disable Dans window, use the standard
+--Waveform2Filterbank::num-mel-bins=40     # 8 filters/octave, 40 filters/16Khz as used by IBM
+--ContextExpansion::minus=5
+--ContextExpansion::plus=2
+--NNVAD::sample-frequency=16000
+--NNVAD::detect-mode=1
+--NNVAD::max-end-silence-time=800
+--NNVAD::max-start-silence-time=3000
+--NNVAD::voice-start-detec-flag=true
+--NNVAD::voice-end-detec-flag=true
+--NNVAD::window-size=300
+--NNVAD::sil-2-speech-time-thres=200
+--NNVAD::speech-2-sil-time-thres=150
+--NNVAD::vad-model-path=./vad/vad.mdl
+--NNVAD::vad-model-has-prior=false
+--NNVAD::speech-2-noise-ratio=1.0
+--NNVAD::do-time-extend=1
+--NNVAD::lookback-time-start-point=300
+--NNVAD::lookahead-time-end-point=300
+--NNVAD::max-single-segment-time=60000
+--NNVAD::snr-threshold=-100.0
+--NNVAD::decibel-threshold=-100.0
+--NNVAD::noise-frame-number-for-snr=100
+--NNVAD::speech-noise-thres=-0.4

BIN
Introduce/Aarchitecture/Resources.bundle/mandarin/vad/vad.mdl


+ 54 - 0
Introduce/Aarchitecture/Resources.bundle/nui.json

@@ -0,0 +1,54 @@
+{
+  "device_type":"phone",
+  "assets_version":"1.1.20190902",
+  "nui_config":{
+    "service_mode":"kModeFullCloud",
+    "log_level":"kLogLevelVerbose",
+    "enable_recorder_by_user":true,
+    "enable_dialog":false
+  },
+  "nls_config":{
+    "debug_level":4,
+    "sr_format":"opus",
+    "sample_rate":16000,
+    "dns_timeout":5000,
+    "vocab_default_weight":2
+  },
+  "audio_config":{
+    "debug":"none",
+    "16k_audio":{
+      "name":"16kmono16bit",
+      "id":0,
+      "mic":{
+        "name":"16kmono16bit",
+        "debug_heap_pollution":false,
+        "read_cnt":0,
+        "sample_rate":16000,
+        "bits_per_sample":16,
+        "channels":1,
+        "recording_interval":10,
+        "cei_frame_time_len":20,
+        "channel_mask":"kAMChannalMaskNone",
+        "format_type":"kAMDataFormatPcmInterleaved",
+        "endianness":"kAMByteOrderLittleEndian"
+      }
+    },
+    "8k_audio":{
+      "name":"8kmono16bit",
+      "id":0,
+      "mic":{
+        "name":"8kmono16bit",
+        "debug_heap_pollution":false,
+        "read_cnt":0,
+        "sample_rate":8000,
+        "bits_per_sample":16,
+        "channels":1,
+        "recording_interval":10,
+        "cei_frame_time_len":20,
+        "channel_mask":"kAMChannalMaskNone",
+        "format_type":"kAMDataFormatPcmInterleaved",
+        "endianness":"kAMByteOrderLittleEndian"
+      }
+    }
+  }
+}

BIN
Introduce/Aarchitecture/Resources.bundle/tts/languagedata_embedded.bin


+ 31 - 0
Introduce/Aarchitecture/Resources.bundle/tts/parameter.cfg

@@ -0,0 +1,31 @@
+{
+  "device_type":"car",
+  "mode_type":2,
+  "smart_engine":3,
+  "encode_type":"pcm",
+  "debug_info":{
+    "debug_level":0,
+    "debug_path":"/sdcard/tmp"
+  },
+  "common_info":{
+    "pitch_level":0,
+    "speed_level":1.0,
+    "volume":1.0,
+    "play_audio":0,
+    "callback_raw_data":1,
+    "sample_rate":16000,
+    "font_name":"xiaoyun",
+    "wait_time":2000
+  },
+  "cache":{
+    "on":true,
+    "max_cache_size":2000000,
+    "save_path":"/sdcard/mit/"
+  },
+  "font":{
+   "on":false,
+   "mode":"cmd",
+   "save_path":"/sdcard/mit"
+ }
+}
+

BIN
Introduce/Aarchitecture/Resources.bundle/tts/voices/voicefont.bin