爬取百度翻译接口

时间:2024-04-07 14:32:25

https://fanyi.baidu.com/v2transapi 会报错

爬取百度翻译接口

2 尝试切换到移动端看看结果  

https://fanyi.baidu.com/basetrans 这才是正确的接口

爬取百度翻译接口 

3 代码展示 

 

# -*- coding: utf-8 -*-

import requests
import pprint
import re

# 检测语言url
testing_url = 'https://fanyi.baidu.com/langdetect'
# 翻译url
translate_url = 'https://fanyi.baidu.com/basetrans'
a =  'https://fanyi.baidu.com/v2transapi'
headers = {
    'User-Agent': "Mozilla/5.0 (iPhone; CPU iPhone OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1"
}

# 检测输入内容语种
def getLanguageType(content):
    data = {'query': content}
    resp = requests.post(url=testing_url, data=data, headers=headers).text  # 这是post 请求 data参数就是 出入的内容
    #  resp = {error: 0, msg: "success", lan: "en"} 判断 len对应的是en 还是zh
    return eval(resp)

# 翻译
def translate(fromlanguage, tolanguage, content):
    data = {
        'from': fromlanguage,
        'to': tolanguage,
        'query': content,
    }
    resp = requests.post(url=translate_url, data=data, headers=headers).text

    return eval(resp)

# 翻译成英文
def to_en(content):
    try:
        # 检测输入内容语种
        language_type = getLanguageType(content)['lan']
        if language_type == 'en':

            return content
        else:
            # 翻译成英文
            data = translate(language_type, 'en', content)
            trans = ' '.join(re.findall(r'[\w,!.?]+', data['trans'][0]['dst']))
            trans = re.sub(',', ',', trans)

            return trans
    except Exception:

        return content

# 翻译成汉语
def to_zh(content):
    try:
        # 检测输入内容语种
        language_type = getLanguageType(content)['lan']
        if language_type == 'zh':

            return content
        else:
            # 翻译成英文
            data = translate(language_type, 'zh', content)
            trans = data['trans'][0]['dst']
            trans = re.sub(',', ',', trans)


            return trans
    except Exception:

        return content