知乎专栏 |
阿里开发团队就是个草台班子,官方供的很多demo中携带的 nuisdk-release.aar 但是没有标明版本号码。有一种方法可以知道他的版本。
NativeNui nui_instance = new NativeNui(); String version = nui_instance.GetVersion(); Log.d(TAG, version);
package cn.netkiller.conference.ai.aliyun; import android.Manifest; import android.content.pm.PackageManager; import android.media.AudioFormat; import android.media.AudioRecord; import android.media.MediaRecorder; import android.os.Handler; import android.os.HandlerThread; import android.util.Log; import androidx.core.app.ActivityCompat; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.JSONArray; import com.alibaba.fastjson.JSONException; import com.alibaba.fastjson.JSONObject; import com.alibaba.idst.nui.AsrResult; import com.alibaba.idst.nui.CommonUtils; import com.alibaba.idst.nui.Constants; import com.alibaba.idst.nui.INativeNuiCallback; import com.alibaba.idst.nui.KwsResult; import com.alibaba.idst.nui.NativeNui; import cn.netkiller.conference.MainApplication; import cn.netkiller.conference.ai.aliyun.wakup.Auth; import cn.netkiller.conference.config.Config; public class AliyunSpeechWakup { private final static int SAMPLE_RATE = 16000; private final static int WAVE_FRAM_SIZE = 20 * 2 * SAMPLE_RATE / 1000; //20ms audio for 16k/16bit/mono private static final String TAG = AliyunSpeechWakup.class.getSimpleName(); private final String defaultSdkCode = "只有使用离线功能才需要填写"; private final String defaultWakupWord = "小云小云"; private final String g_sdk_code = "software_nls_tts_offline_standard"; // 精品版为software_nls_tts_offline, 标准版为software_nls_tts_offline_standard NativeNui nui_instance = new NativeNui(); private AudioRecord mAudioRecorder = null; private Handler mHandler; INativeNuiCallback callback = new INativeNuiCallback() { @Override public void onNuiEventCallback(Constants.NuiEvent nuiEvent, int i, int i1, KwsResult kwsResult, AsrResult asrResult) { Log.i(TAG, "event=" + nuiEvent); if (nuiEvent == Constants.NuiEvent.EVENT_WUW_TRUSTED) { JSONObject jsonObject = JSON.parseObject(kwsResult.kws); String result = jsonObject.getString("word"); if (!result.isEmpty()) { Log.d(TAG, "EVENT_WUW_TRUSTED 激活词: " + result); } } else if (nuiEvent == Constants.NuiEvent.EVENT_ASR_RESULT) { JSONObject jsonObject = JSON.parseObject(asrResult.allResponse); JSONObject payload = jsonObject.getJSONObject("payload"); String result = payload.getString("result"); if (!result.isEmpty()) { Log.d(TAG, result); } Log.d(TAG, asrResult.asrResult); // 获得task_id,并保存音频 /// / if (mSaveAudioSwitch.isChecked()) { /// / JSONObject header = jsonObject.getJSONObject("header"); /// / curTaskId = header.getString("task_id"); /// / if (!curTaskId.isEmpty() && tmpAudioQueue.size() > 0) { /// / try { /// / mRecordingAudioFilePath = mDebugPath + "/" + "wakeupSR_task_id_" + curTaskId + ".pcm"; /// / Log.i(TAG, "save recorder data into " + mRecordingAudioFilePath); /// / mRecordingAudioFile = new FileOutputStream(mRecordingAudioFilePath, true); /// / try { /// / byte[] audioData = tmpAudioQueue.take(); /// / try { /// / mRecordingAudioFile.write(audioData); /// / mRecordingAudioFile.close(); /// / mRecordingAudioFile = null; /// / String show = "存储录音音频到 " + mRecordingAudioFilePath; /// / appendText(detailView, show); /// / ToastText(show, Toast.LENGTH_SHORT); /// / } catch (IOException e) { /// / e.printStackTrace(); /// / } /// / } catch (InterruptedException e) { /// / e.printStackTrace(); /// / } /// / } catch (IOException e) { /// / e.printStackTrace(); /// / } /// / } /// / curTaskId = ""; /// / mRecordingAudioFilePath = ""; /// / tmpAudioQueue.clear(); /// / } mHandler.post(new Runnable() { @Override public void run() { nui_instance.startDialog(Constants.VadMode.TYPE_KWS, genDialogParams()); } }); } else if (nuiEvent == Constants.NuiEvent.EVENT_ASR_PARTIAL_RESULT) { JSONObject jsonObject = JSON.parseObject(asrResult.allResponse); JSONObject payload = jsonObject.getJSONObject("payload"); String result = payload.getString("result"); if (!result.isEmpty()) { Log.d(TAG, result); } } else if (nuiEvent == Constants.NuiEvent.EVENT_ASR_ERROR || nuiEvent == Constants.NuiEvent.EVENT_MIC_ERROR) { // asrResult在EVENT_ASR_ERROR中为错误信息,搭配错误码resultCode和其中的task_id更易排查问题,请用户进行记录保存。 Log.d(TAG, asrResult.asrResult); // Log.d(TAG,"ERROR with " + resultCode); // final String msg_text = Utils.getMsgWithErrorCode(resultCode, "start"); // ToastText(msg_text, Toast.LENGTH_SHORT); if (nuiEvent == Constants.NuiEvent.EVENT_MIC_ERROR) { // EVENT_MIC_ERROR表示2s未传入音频数据,请检查录音相关代码、权限或录音模块是否被其他应用占用。 // 此处也可重新启动录音模块 } } else if (nuiEvent == Constants.NuiEvent.EVENT_DIALOG_EX) { /* unused */ Log.i(TAG, "dialog extra message = " + asrResult.asrResult); } } @Override public int onNuiNeedAudioData(byte[] bytes, int i) { if (mAudioRecorder == null) { return -1; } if (mAudioRecorder.getState() != AudioRecord.STATE_INITIALIZED) { Log.e(TAG, "audio recorder not init"); return -1; } // 送入SDK int audio_size = mAudioRecorder.read(bytes, 0, i); return audio_size; } @Override public void onNuiAudioStateChanged(Constants.AudioState audioState) { Log.i(TAG, "onNuiAudioStateChanged"); if (audioState == Constants.AudioState.STATE_OPEN) { Log.i(TAG, "audio recorder start"); if (mAudioRecorder != null) { mAudioRecorder.startRecording(); } Log.i(TAG, "audio recorder start done"); } else if (audioState == Constants.AudioState.STATE_CLOSE) { Log.i(TAG, "audio recorder close"); if (mAudioRecorder != null) { mAudioRecorder.release(); } } else if (audioState == Constants.AudioState.STATE_PAUSE) { Log.i(TAG, "audio recorder pause"); if (mAudioRecorder != null) { mAudioRecorder.stop(); } } } @Override public void onNuiAudioRMSChanged(float v) { // Log.i(TAG, "onNuiAudioRMSChanged vol " + val); } @Override public void onNuiVprEventCallback(Constants.NuiVprEvent nuiVprEvent) { // Log.i(TAG, "onNuiVprEventCallback event " + event); } // @Override // public void onNuiLogTrackCallback(Constants.LogLevel level, String log) { // Log.i(TAG, "onNuiLogTrackCallback log level:" + level + ", message -> " + log); // } }; private HandlerThread mHanderThread; public void startSpeechWakup() { // String version = nui_instance.GetVersion(); // final String sdk_ver = Utils.extractVersion(version); // Log.i(TAG, "current sdk version: " + version + " sdk_ver: " + sdk_ver); doInit(); mHanderThread = new HandlerThread("process_thread"); mHanderThread.start(); mHandler = new Handler(mHanderThread.getLooper()); boolean ret = startDialog(); } public void stopSpeechWakup() { mHandler.post(new Runnable() { @Override public void run() { long ret = nui_instance.stopDialog(); Log.i(TAG, "cancel dialog " + ret + " end"); } }); } protected void onStart() { Log.i(TAG, "onStart"); doInit(); } protected void onStop() { Log.i(TAG, "onStop"); nui_instance.release(); } private void doInit() { //这里主动调用完成SDK配置文件的拷贝, 即将nuisdk.aar中assets中资源文件拷贝到cache目录 if (CommonUtils.copyAssetsData(MainApplication.getContext())) { Log.i(TAG, "copy assets data done"); } else { Log.e(TAG, "copy assets failed"); return; } //如果需要使用外置的唤醒模型,把文件放在assets存储的cache目录,通过以下接口设置 // CommonUtils.setExternalAssetFile(this, "pack_kws.bin"); //获取工作路径, 即获得拷贝后资源文件存储的cache路径, 作为workspace // String asset_path = CommonUtils.getModelPath(MainApplication.getContext()); String asset_path = "/data/data/cn.netkiller.conference/files/asr_my"; Log.i(TAG, "use workspace " + asset_path); String mDebugPath = MainApplication.getContext().getExternalCacheDir().getAbsolutePath() + "/debug"; Utils.createDir(mDebugPath); //初始化SDK,注意用户需要在Auth.getAliYunTicket中填入相关ID信息才可以使用。 //由于唤醒功能为本地功能, 涉及鉴权, 故genInitParams中需要填写ak_id、ak_secret int ret = nui_instance.initialize(callback, genInitParams(asset_path, mDebugPath), Constants.LogLevel.LOG_LEVEL_VERBOSE, true); if (ret == Constants.NuiResultCode.SUCCESS) { Log.i(TAG, "initialize result = " + ret); } else { final String msg_text = Utils.getMsgWithErrorCode(ret, "init"); Log.d(TAG, msg_text); } } private String genParams() { String params = ""; try { JSONObject nls_config = new JSONObject(); // 是否返回中间识别结果,默认值:False。 nls_config.put("enable_intermediate_result", true); // 是否在后处理中添加标点,默认值:False。 nls_config.put("enable_punctuation_prediction", true); //参数可根据实际业务进行配置 // nls_config.put("enable_inverse_text_normalization", true); // nls_config.put("enable_voice_detection", true); // nls_config.put("customization_id", "test_id"); // nls_config.put("vocabulary_id", "test_id"); // nls_config.put("max_start_silence", 10000); // nls_config.put("max_end_silence", 800); nls_config.put("sample_rate", SAMPLE_RATE); // nls_config.put("sr_format", "opus"); /*若文档中不包含某些参数,但是此功能支持这个参数,可以用如下万能接口设置参数*/ // JSONObject extend_config = new JSONObject(); // extend_config.put("custom_test", true); // nls_config.put("extend_config", extend_config); JSONObject parameters = new JSONObject(); parameters.put("nls_config", nls_config); parameters.put("service_type", Constants.kServiceTypeASR); //可以通过以下方式设置自定义唤醒词进行体验,如果需要更好的唤醒效果请进行唤醒词定制 //注意:动态唤醒词只有在设置了唤醒模型的前提下才可以使用 JSONArray dynamic_wuw = new JSONArray(); JSONObject wuw = new JSONObject(); wuw.put("name", "小白小白"); wuw.put("type", "main"); dynamic_wuw.add(wuw); // for (String part : allWakeupWords) { // JSONObject wuw = new JSONObject(); // wuw.put("name", part.trim()); // wuw.put("type", "main"); // dynamic_wuw.add(wuw); // } parameters.put("wuw", dynamic_wuw); //如果有HttpDns则可进行设置 // parameters.put("direct_ip", Utils.getDirectIp()); params = parameters.toString(); } catch (JSONException e) { e.printStackTrace(); } Log.d(TAG, "genParams() " + params); return params; } private boolean startDialog() { if (ActivityCompat.checkSelfPermission( MainApplication.getContext(), Manifest.permission.RECORD_AUDIO) == PackageManager.PERMISSION_GRANTED) { if (mAudioRecorder == null) { //录音初始化,录音参数中格式只支持16bit/单通道,采样率支持8K/16K //使用者请根据实际情况选择Android设备的MediaRecorder.AudioSource //录音麦克风如何选择,可查看https://developer.android.google.cn/reference/android/media/MediaRecorder.AudioSource mAudioRecorder = new AudioRecord(MediaRecorder.AudioSource.DEFAULT, SAMPLE_RATE, AudioFormat.CHANNEL_IN_MONO, AudioFormat.ENCODING_PCM_16BIT, WAVE_FRAM_SIZE * 4); } else { Log.w(TAG, "AudioRecord has been new ..."); } } else { Log.e(TAG, "未获得录音权限 RECORD_AUDIO permission!"); return false; } mHandler.post(new Runnable() { @Override public void run() { //设置相关识别参数,具体参考API文档 nui_instance.setParams(genParams()); //唤醒后进行识别模式 int ret = nui_instance.startDialog(Constants.VadMode.TYPE_KWS, genDialogParams()); Log.i(TAG, "start done with " + ret); if (ret != 0) { final String msg_text = Utils.getMsgWithErrorCode(ret, "start"); Log.d(TAG, msg_text); } else { Log.d(TAG, "等待唤醒 ..."); } } }); return true; } private String genInitParams(String workpath, String debugpath) { String str = ""; try { // 需要特别注意:ak_id/ak_secret/app_key/sdk_code/device_id等参数必须传入SDK // 离线语音合成sdk_code取值:精品版为software_nls_tts_offline, 标准版为software_nls_tts_offline_standard // 离线语音合成账户的sdk_code也可用于唤醒 // 鉴权信息获取参:https://help.aliyun.com/document_detail/69835.htm Log.d(TAG, "Wakup token: " + Config.Aliyun.token); //获取账号访问凭证: // 注意!此activity是唤醒+一句话识别的功能演示,包含离线唤醒功能和在线识别功能, // 所以账号方案需要mixed方案,或者创建两个Appkey分别用户唤醒功能和在线功能。 // Auth.GetTicketMethod method = Auth.GetTicketMethod.GET_STS_ACCESS_FROM_SERVER_FOR_MIXED_FEATURES; Auth.GetTicketMethod method = Auth.GetTicketMethod.GET_ACCESS_IN_CLIENT_FOR_MIXED_FEATURES; Auth.setAppKey(Config.Aliyun.Wakup.appKey); Auth.setToken(Config.Aliyun.token); Auth.setAccessKey(Config.Aliyun.accessKeyId); Auth.setAccessKeySecret(Config.Aliyun.accessKeySecret); // Auth.setStsToken(""); Auth.setSdkCode("software_nls_tts_offline_standard"); Log.i(TAG, "Use method:" + method); JSONObject object = Auth.getTicket(method); object.put("url", "wss://nls-gateway.cn-shanghai.aliyuncs.com:443/ws/v1"); //工作目录路径,SDK从该路径读取配置文件 object.put("workspace", workpath); // 必填 object.put("debug_path", debugpath); //过滤SDK内部日志通过回调送回到用户层 object.put("log_track_level", String.valueOf(Constants.LogLevel.toInt(Constants.LogLevel.LOG_LEVEL_INFO))); // FullMix = 0 // 选用此模式开启本地功能并需要进行鉴权注册 // FullCloud = 1 // FullLocal = 2 // 选用此模式开启本地功能并需要进行鉴权注册 // AsrMix = 3 // 选用此模式开启本地功能并需要进行鉴权注册 // AsrCloud = 4 // AsrLocal = 5 // 选用此模式开启本地功能并需要进行鉴权注册 object.put("service_mode", Constants.ModeFullMix); // 特别说明: 鉴权所用的id是由device_id,与手机内部的一些唯一码进行组合加密生成的。 // 更换手机或者更换device_id都会导致重新鉴权计费。 // 此外, 以下device_id请设置有意义且具有唯一性的id, 比如用户账号(手机号、IMEI等), // 传入相同或随机变换的device_id会导致鉴权失败或重复收费。 // Utils.getDeviceId() 并不能保证生成不变的device_id,请不要使用 // object.put("device_id", "empty_device_id"); // 必填, 推荐填入具有唯一性的id, 方便定位问题。 object.put("device_id", Config.Android.androidId); // 必填, 推荐填入具有唯一性的id, 方便定位问题。 //如果使用外置唤醒资源,可以进行设置文件路径。通过upgrade_file参数传入唤醒模型文件的绝对路径。 // 举例1:模型文件kws.bin可以放在assets,这里需要主动拷贝到App的data目录,并获得绝对路径。 String kws_bin_name = "kws.bin"; // String kws_bin_dest_name = CommonUtils.getModelPath(MainApplication.getContext()) + "/" + kws_bin_name; // CommonUtils.copyAsset(MainApplication.getContext(), kws_bin_name, kws_bin_dest_name); // 举例2:模型文件kws.bin放在/sdcard/目录,请确认读写权限 // kws_bin_name = "kws.bin"; String kws_bin_dest_name = "/data/data/cn.netkiller.conference/files/asr_my/" + kws_bin_name; object.put("upgrade_file", kws_bin_dest_name); str = object.toString(); } catch (JSONException e) { e.printStackTrace(); } // 注意! str中包含ak_id ak_secret token app_key等敏感信息, 实际产品中请勿在Log中输出这类信息! Log.i(TAG, "InsideUserContext:" + str); return str; } private String genDialogParams() { String params = ""; try { JSONObject dialog_param = new JSONObject(); // 运行过程中可以在startDialog时更新临时参数,尤其是更新过期token // 注意: 若下一轮对话不再设置参数,则继续使用初始化时传入的参数 long distance_expire_time_5m = 300; dialog_param = Auth.refreshTokenIfNeed(dialog_param, distance_expire_time_5m); // 注意: 若需要更换appkey和token,可以直接传入参数 // dialog_param.put("app_key", ""); // dialog_param.put("token", ""); params = dialog_param.toString(); } catch (JSONException e) { e.printStackTrace(); } Log.i(TAG, "dialog params: " + params); return params; } }
package cn.netkiller.conference.ai.aliyun; import android.media.AudioFormat; import android.media.AudioManager; import android.media.AudioTrack; import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult; import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam; import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer; import com.alibaba.dashscope.common.ResultCallback; import java.io.FileNotFoundException; import java.util.concurrent.CountDownLatch; import cn.netkiller.conference.ai.aigc.AigcSpeechSynthesizer; public class AliyunCosyVoiceSpeechSynthesizer implements AigcSpeechSynthesizer { private static final String TAG = AliyunCosyVoiceSpeechSynthesizer.class.getSimpleName(); int sampleRate = 44100; int channelConfig = AudioFormat.CHANNEL_OUT_MONO; int audioFormat = AudioFormat.ENCODING_PCM_16BIT; int buffersize = AudioTrack.getMinBufferSize(16000, AudioFormat.CHANNEL_OUT_MONO, AudioFormat.ENCODING_MP3); private final AudioTrack audioTrack = new AudioTrack(AudioManager.STREAM_MUSIC, 16000, AudioFormat.CHANNEL_OUT_MONO, AudioFormat.ENCODING_MP3, buffersize, AudioTrack.MODE_STREAM); public synchronized void playTrack(byte[] buffer) { if (audioTrack != null && audioTrack.getPlayState() == AudioTrack.PLAYSTATE_PLAYING) { audioTrack.write(buffer, 0, buffer.length); } } @Override public void stopSpeechRecognizer() { } @Override public void startSpeechRecognizer() throws FileNotFoundException { String textToSynthesize = "想不到时间过得这么快!昨天和你视频聊天,看到你那自豪又满意的笑容,我的心里呀,就如同喝了一瓶蜜一样甜呢!真心为你开心呢!"; SpeechSynthesisParam param = SpeechSynthesisParam.builder() .model("cosyvoice-v1") .voice("loongstella") .apiKey("sk-56c7bc69e1c2407b9244e0895f603afe") .build(); System.out.println("init params done"); // Start the player audioTrack.play(); class ReactCallback extends ResultCallback<SpeechSynthesisResult> { public final CountDownLatch latch = new CountDownLatch(1); // final File file = new File("result.mp3"); // final FileOutputStream fos = new FileOutputStream(file); ReactCallback() throws FileNotFoundException { } @Override public void onEvent(SpeechSynthesisResult message) { // Write Audio to player if (message.getAudioFrame() != null) { // audioPlayer.write(message.getAudioFrame()); // fos.write(message.getAudioFrame().array()); playTrack(message.getAudioFrame().array()); } } @Override public void onComplete() { audioTrack.stop(); audioTrack.release(); System.out.println("synthesis onComplete!"); latch.countDown(); } @Override public void onError(Exception e) { System.out.println("synthesis onError!"); e.printStackTrace(); } public void waitForComplete() throws InterruptedException { latch.await(); } } // Create a speech synthesizer ReactCallback callback = new ReactCallback(); SpeechSynthesizer synthesizer = new SpeechSynthesizer(param, callback); // Start the synthesizer with Text System.out.printf("start synthesizer : %s \n", textToSynthesize); synthesizer.call(textToSynthesize); try { callback.waitForComplete(); } catch (InterruptedException e) { throw new RuntimeException(e); } System.out.println("[Metric] requestId: " + synthesizer.getLastRequestId() + ", first package delay ms: " + synthesizer.getFirstPackageDelay()); } @Override public void say(String text) { } }