1+ # encoing = utf-8
2+
3+ from urllib import request
4+ import json , base64 , uuid , os
5+ import wave
6+ import pycurl
7+ import io
8+
9+ bda_app_id = "7972313"
10+ bda_api_key = "ZrjLfF5Rh7pOL66gaOmDGnXn"
11+ bda_secret_key = "16bac9645093ca2632ebb81015ff7544"
12+
13+ bda_access_token = ""
14+ bda_expires_in = ""
15+ ret_text = ""
16+
17+ def get_mac_address ():
18+ return uuid .UUID (int = uuid .getnode ()).hex [- 12 :]
19+
20+ def get_access_token ():
21+ url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=ZrjLfF5Rh7pOL66gaOmDGnXn&client_secret=16bac9645093ca2632ebb81015ff7544"
22+
23+ req = request .Request (url , method = "POST" )
24+ resp = request .urlopen (req )
25+ data = resp .read ().decode ('utf-8' )
26+ json_data = json .loads (data )
27+
28+ global bda_access_token
29+ bda_access_token = json_data ['access_token' ]
30+
31+ return bda_access_token
32+
33+ CHUNK = 1024
34+ def get_wav_data (wav_path ):
35+ if wav_path is None or len (wav_path ) == 0 :
36+ return None
37+
38+ fp = wave .open (wav_path , 'rb' )
39+ nf = fp .getnframes ()
40+ f_len = nf * 2
41+ audio_data = fp .readframes (nf )
42+
43+ return audio_data , f_len
44+
45+ def dump_res (buf ):
46+ resp_json = json .loads (buf .decode ('utf-8' ))
47+ ret = resp_json ['result' ]
48+
49+ global ret_text
50+ ret_text = ret [0 ]
51+
52+ print (buf )
53+
54+ def wav_to_text (wav_path ):
55+ if wav_path is None or len (wav_path ) == 0 :
56+ return None
57+
58+ if len (bda_access_token ) == 0 :
59+ get_access_token ()
60+ if len (bda_access_token ) == 0 :
61+ return None
62+
63+ data , f_len = get_wav_data (wav_path )
64+
65+ url = 'http://vop.baidu.com/server_api?cuid=' + get_mac_address () + '&token=' + bda_access_token
66+ http_header = [
67+ 'Content-Type: audio/pcm; rate=8000' ,
68+ 'Content-Length: %d' % f_len
69+ ]
70+
71+ c = pycurl .Curl ()
72+ c .setopt (pycurl .URL , str (url )) #curl doesn't support unicode
73+ #c.setopt(c.RETURNTRANSFER, 1)
74+ c .setopt (c .HTTPHEADER , http_header ) #must be list, not dict
75+ c .setopt (c .POST , 1 )
76+ c .setopt (c .CONNECTTIMEOUT , 30 )
77+ c .setopt (c .TIMEOUT , 30 )
78+ c .setopt (c .WRITEFUNCTION , dump_res )
79+ c .setopt (c .POSTFIELDS , data )
80+ c .setopt (c .POSTFIELDSIZE , f_len )
81+ c .perform () #pycurl.perform() has no return val
82+
83+ return ret_text
84+
85+
86+ # def wav_to_text(wav_path):
87+ # if wav_path is None or len(wav_path) == 0:
88+ # return None
89+ #
90+ # wav_data = get_wav_data(wav_path)
91+ # if wav_data is None:
92+ # return None
93+ #
94+ # if len(bda_access_token) == 0:
95+ # get_access_token()
96+ #
97+ # wav_base64 = base64.b64decode(wav_data)
98+ # print("%s", wav_base64)
99+ # # unicode( wav_base64, errors='ignore')
100+ # wav_len = len(wav_data)
101+ # data_dic = {'format':'wav', 'rate':8000, 'channel':1,
102+ # 'cuid':get_mac_address(), 'token':bda_access_token,
103+ # b'speech':wav_base64, 'len':wav_len}
104+ # json_data = json.dumps(data_dic).encode('utf-8')
105+ # json_len = len(json_data)
106+ #
107+ # req = request.Request('http://vop.baidu.com/server_api')
108+ # req.add_header('Content-Type', "application/json")
109+ # req.add_header("Content-Length", json_len)
110+ # resp = request.urlopen(req, data=json_data)
111+ #
112+ # resp_data = resp.read().decode('utf-8')
113+ # resp_json = json.loads(resp_data)
114+ #
115+ # return resp_json['result']
0 commit comments