首先,用 puppeteer 试过,没问题,但效率太低; 尝试破解接口, 返回为空。
from requests import Session
from time import time
from hashlib import md5
from urllib.request import urlparse
session = Session()
def tt(url = 'http://m.toutiao.com/profile/50502347096/'):
as_, cp_ = tt_encrypt()
datas = {
'page_type': '1',
'max_behot_time': '',
'uid': '50502347096',
'media_id': '50502347096',
'output': 'json',
'is_json': '1',
'count': 10,
'from': 'user_profile_app',
'version': '2',
'as': as_,
'cp': cp_,
'callback': 'jsonp5'
}
headers = {
'Referer': url,
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36'
}
article_api = 'https://www.toutiao.com/pgc/ma/'
r = session.get(article_api, headers=headers, params=datas)
print(r.url)
print(r.request.headers)
print(r.text)
def tt_encrypt():
now = int(time())
now_16 = hex(now).upper()[2:]
now_16_md5 = md5(now_16.encode('utf-8')).hexdigest().upper()
if len(now_16) == 8:
s = now_16_md5[0:5]
o = now_16_md5[-5:]
n = ''
l = ''
for i in range(5):
n += s[i] + now_16[i]
l += now_16[i+3] + o[i]
as_ = 'A1' + n + now_16[-3:]
cp_ = now_16[0:3] + l + 'E1'
else:
as_ = '479BB4B7254C150'
cp_ = '7E0AC8874BB0985'
return as_, cp_
if __name__ == '__main__':
tt()
接口返回
jsonp5({"media_id": 50502347096, "has_more": 0, "next": {"max_behot_time": 0}, "page_type": 1, "message": "success", "data": []})
1
di1012 2019-07-02 17:07:20 +08:00
有点赞的接口吗
|
2
NotNil1 2019-07-02 17:13:15 +08:00
看看下面那个推广反爬虫的,知己知彼。
|
3
exceloo 2019-07-02 17:22:25 +08:00
tt_encrypt 确认没错吗?
|
4
xuyl OP |
6
boom7 2019-07-02 17:55:21 +08:00 1
瞅了一眼, 你 tt_encrypt 的第三行出错了, 需要被 md5 的不是 now_16 ,而是 now
|
7
jaylee77 2019-07-02 18:27:32 +08:00
|
8
asmoker 2019-07-02 19:19:52 +08:00
难道是这样?两年前的方法了……
def _gen_req_params(): """ 生成请求头条 URL 必要的密钥参数 :return: """ # 响应结果 params = { 'as': '479BB4B7254C150', 'cp': '7E0AC8874BB0985' } # 当前时间戳 timestamp = int(math.floor(int(round(time.time() * 1000)) / 1000)) now = hex(timestamp) now_str_number = now[2:len(now)].upper() now_md5 = hashlib.md5(str(timestamp).encode()).hexdigest().upper() # 计算 as 和 cp 参数 if len(now_str_number) == 8: as_pre = '' cp_pre = '' first_five_char = now_md5[0:5] last_five_char = now_md5[:-5] for i in range(5): as_pre += first_five_char[i] + now_str_number[i] for j in range(5): cp_pre += now_str_number[j + 3] + last_five_char[j] as_result = "A1" + as_pre + now_str_number[-3:] cp_result = now_str_number[0:3] + cp_pre + "E1" params = { 'as': as_result, 'cp': cp_result } return params |
9
airdge 2019-07-03 07:29:40 +08:00 1
now_16_md5 = md5(str(now).encode('utf-8')).hexdigest().upper()
|