爬取网易云评论数据

主要是涉及到一些加解密的东西,AES加密算法等,不过问题不大,python直接上库

还有就是一点点js逆向调试的知识,对于CTF选手理解起来应该问题不大

视频链接

上代码

"""
@Author: C4ry7nk
"""

# 参数加密 data: "params=l7x8YID96yGLcnVqx42ssXOxw%2FNbE75LDHagas1MACEGYZvBnsSVeLufcb5U0Y2JBiRAQQbetQvjlkz8g0nmQhjwbxEBPtnoWOv7BPJj2M0XZN5k3VYEtUXdZ9qrdesaGTwYnadYrACfSL1H3UoLISMIrdtpBWBz6FZOyv3khJ8NaSdxm1bGd7iIJuGhyDqv5I3YQU5KAlMG3c1mJYhACYpbcrWBwnW62jAM2pEMPWC5rlb%2FyK0v0NiLgmeoOeS96%2BQHREM4kbFWkCg09lp60tbM1Yspk5BzdHTgpY8UtRg%3D&encSecKey=6ddce7f83d1cf1f5f66b83b02de68ffc68fe9d2361c06f119abb95a11191a2784ac2975b138f278d609041370c6b32c0cce7564ceebc6b5e8e32b5e4d92deb4d7602b772b344151c4abe2974cb2e2c8a9cc5c53edced9abc7f35ff9791aa2c26adce5690eaf4b1faf827ec22b38cb799d4eaebbf873dc1467d636b7405b44c8c"
# params ==> encText encSecKey ==> encSecKey
# 找到加密函数 window.asrsea(JSON.stringify(i0x), xxxxxx) ===> window.asrsea = d
# post请求

"""
function() {
function a(a) { # a = 16
var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = "";
for (d = 0; a > d; d += 1)
e = Math.random() * b.length,
e = Math.floor(e),
# 随机数取整
c += b.charAt(e);
# 取b中字符串处于e位置的字符
return c
}
function b(a, b) {
var c = CryptoJS.enc.Utf8.parse(b)
, d = CryptoJS.enc.Utf8.parse("0102030405060708")
, e = CryptoJS.enc.Utf8.parse(a)
, f = CryptoJS.AES.encrypt(e, c, {
iv: d,
mode: CryptoJS.mode.CBC
});
return f.toString()
}
function c(a, b, c) {
var d, e;
return setMaxDigits(131),
d = new RSAKeyPair(b,"",c),
e = encryptedString(d, a)
}
function d(d, e, f, g) { # d = data , e = bsR6L(["流泪", "强"]) ==> 010001,
# f = bsR6L(Xp8h.md) ==> 00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7
var h = {} # g = bsR6L(["爱心", "女孩", "惊恐", "大笑"]) ==> 0CoJUm6Qyw8W8jud
, i = a(16); 传入16
return h.encText = b(d, g),
h.encText = b(h.encText, i),
h.encSecKey = c(i, e, f),
h
}
function e(a, b, d, e) {
var f = {};
return f.encText = c(a + e, b, d),
f
}
window.asrsea = d,
window.ecnonasr = e
}();
"""

from Crypto.Cipher import AES
import requests
import json
from base64 import b64encode

url = f"https://music.163.com/weapi/comment/resource/comments/get?csrf_token="

data = {
"csrf_token": "",
"cursor": "-1",
"offset": "0",
"orderType": "1",
"pageNo": "1",
"pageSize": "20",
"rid": "R_SO_4_1454037844",
"threadId": "R_SO_4_1454037844"
}

d = data
e = "010001"
f = "00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7"
g = "0CoJUm6Qyw8W8jud"
i = "hdsiOfBnjaQBoPVg"


def get_encseckey():
return "13ea329d6819dd53ca6277e72b006aa09a06dc3e048d66e4d85a95b1f0b1adefb1cfe2b3d08e322d900bb2fa1cedc055007c62f2c812766f0bee49273004678327861b3aa9b2112b9d88c698ac859590416ca1b2506e13430b373edac3ce674e6f591841fa93b82963a8fc7dee39ad2338d4d90864c9738519ad5c75c8e070bb"


def get_enctext(data):
fir = enc_params(data, g)
res = enc_params(fir, i)
return res


def to_16(data):
padding = 16 - len(data) % 16
data += chr(padding) * padding
return data

def enc_params(data, key):
iv = "0102030405060708"
data = to_16(data)
aes = AES.new(key=key.encode('utf-8'), mode=AES.MODE_CBC, IV=iv.encode('utf-8'))
result = aes.encrypt(data.encode("utf-8"))
return str(b64encode(result), "utf-8")


resp = requests.post(url=url, data={
"params": get_enctext(json.dumps(data)),
"encSecKey": get_encseckey()
})

print(resp.text)

代码量很少,大部分都是注释,注意跑的时候,i和encSecKey的值只能用一次,刷新之后要重新调试看

部分数据

这里我本来想抓热评的,但是貌似参数传错了,抓成最新评论了

爬取网易云评论数据

https://lhxhl.github.io/2022/03/24/wyy/

作者

秋秋晚

发布于

2022-03-24

更新于

2022-05-09

许可协议

评论

:D 一言句子获取中...