网易云歌曲爬取(包含Vip歌曲)

歌单批量下载

仅能下载非vip歌曲,相当于批量点击下载按钮的操作没啥难度,需自己在py文件目录里新建music文件

# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup

header = {  # 伪造浏览器头部,不然获取不到网易云音乐的页面源代码。
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.80 Safari/537.36',
'Referer': 'http://93.174.95.27',
}

# link = 'http://music.163.com/playlist?id=2884035'  # 网易原创歌曲榜

# link ='http://music.163.com/playlist?id=19723756' # 云音乐飙升榜

# link ='http://music.163.com/playlist?id=3778678'  # 云音乐热歌榜

# link ='http://music.163.com/playlist?id=3779629'    # 云音乐新歌榜

link = "https://music.163.com/playlist?id=3779629"

# print(link)

# 这是网易云音乐歌单的链接,注意删除链接中的'#'

# (其实是嵌套在网页里面含有歌曲数据的页面框架的真实链接)

r = requests.get(link, headers=header)
html = r.content

# print(html)

soup = BeautifulSoup(html, "html.parser")
songs = soup.find("ul", class_="f-hide").select("a", limit=100)

# 通过分析网页源代码发现排行榜中的歌曲信息全部放在类名称为 f-hide 的 ul 中

# 于是根据特殊的类名称查找相应 ul,然后找到里面的全部 a 标签

# 限制数量为 10,即歌单的前 10 首歌

i = 1
for s in songs:
song_id = s['href'][9:]
song_name = s.text
song_down_link = "http://music.163.com/song/media/outer/url?id=" + song_id + ".mp3"
print("第 " + str(i) + " 首歌曲:" + song_name)
print("正在下载...")
response = requests.get(song_down_link, headers=header).content
f = open('music\\'+song_name + ".mp3", 'wb')
# music\\  路径
f.write(response)
f.close()
print("下载完成!\n\r")
i = i + 1

运行结果:

会下载在目录下面的music文件里,真实能听

vip音乐下载

重点来了,vip音乐都有加密,而且加密方式经常变,截止2021/05/25,代码好使
其中Crypto库需要安装pycrypto,引入的时候还是Crypto就可以

# -*- coding: utf-8 -*-

import requests
from Crypto.Cipher import AES, PKCS1_OAEP
from Crypto.Util.Padding import pad
from Crypto.PublicKey import RSA
from Crypto.Random import get_random_bytes
import random
import base64
import json
import os


class EncryptText:
    def __init__(self):
        self.character = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
        self.iv = '0102030405060708'
        self.public_key = '010001'
        self.modulus = '00e0b509f6259df8642dbc35662901477df22677ec152b' \
                       '5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417' \
                       '629ec4ee341f56135fccf695280104e0312ecbda92557c93' \
                       '870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b' \
                       '424d813cfe4875d3e82047b97ddef52741d546b8e289dc69' \
                       '35b3ece0462db0a22b8e7'
        self.nonce = '0CoJUm6Qyw8W8jud'

    def create16RandomBytes(self):
        """
        # 产生16位随机字符, 对应函数a
        :return:
        """
        generate_string = random.sample(self.character, 16)
        generated_string = ''.join(generate_string)
        return generated_string

    def AESEncrypt(self, clear_text, key):
        """
        AES加密, 对应函数b
        :param clear_text: 需要加密的数据
        :return:
        """
        # 数据填充
        clear_text = pad(data_to_pad=clear_text.encode(), block_size=AES.block_size)
        key = key.encode()
        iv = self.iv.encode()
        aes = AES.new(key=key, mode=AES.MODE_CBC, iv=iv)
        cipher_text = aes.encrypt(plaintext=clear_text)
        # 字节串转为字符串
        cipher_texts = base64.b64encode(cipher_text).decode()
        return cipher_texts

    def RSAEncrypt(self, session_key):
        """
        RSA加密的结果每次都不一样
        :param session_key:
        :return:
        """
        # n和e构成公钥
        # (n, e)
        # key = RSA.RsaKey(n=int(self.modulus, 16), e=int(self.public_key, 16))
        key = RSA.construct(rsa_components=(int(self.modulus, 16), int(self.public_key, 16)))
        public_key = key.publickey()
        rsa = PKCS1_OAEP.new(key=public_key)
        cipher_text = rsa.encrypt(message=session_key).hex()
        return cipher_text

    def RSAEncrypt(self, i, e, n):
        """
        RSA加密, 对应函数c
        :param i:
        :return:
        """
        # num = pow(x, y) % z
        # 加密C=M^e mod n
        num = pow(int(i[::-1].encode().hex(), 16), int(e, 16), int(n, 16))
        result = format(num, 'x')
        return result

    def resultEncrypt(self, input_text):
        """
        对应函数d
        :param input_text:
        :return:
        """
        i = self.create16RandomBytes()
        encText = self.AESEncrypt(input_text, self.nonce)
        encText = self.AESEncrypt(encText, i)
        encSecKey = self.RSAEncrypt(i, self.public_key, self.modulus)
        from_data = {
            'params': encText,
            'encSecKey': encSecKey
        }
        return from_data


class WangYiYunMusic(object):
    def __init__(self):
        self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) '
                                      'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}

    def get_html(self, url, method='GET', from_data=None):
        try:
            if method == 'GET':
                response = requests.get(url, headers=self.headers)
            else:
                response = requests.post(url, from_data, headers=self.headers)
            response.raise_for_status()
            response.encoding = 'utf-8'
            return response.text
        except Exception as err:
            print(err)
            return '请求异常'

    def parse_text(self, text):
        ids_list = json.loads(text)['result']['songs']
        count = 0
        info_list = []
        print('{:*^80}'.format('搜索结果如下'))
        print('{0:{5}<5}{1:{5}<20}{2:{5}<10}{3:{5}<10}{4:{5}<20}'.format('序号', '歌名', '歌手', '时长(s)', '专辑', chr(12288)))
        print('{:-^84}'.format('-'))
        for id_info in ids_list:
            song_name = id_info['name']
            id = id_info['id']
            time = id_info['dt'] // 1000
            album_name = id_info['al']['name']
            picture_url = id_info['al']['picUrl']
            singer = id_info['ar'][0]['name']
            info_list.append([id, song_name, singer])
            # print(id)
            # print(song_name)
            # print(singer)
            print('{0:{5}<5}{1:{5}<20}{2:{5}<10}{3:{5}<10}{4:{5}<20}'.format(count, song_name, singer, time, album_name,
                                                                             chr(12288)))
            count += 1
            if count == 8:
                # 为了测试方便, 这里只显示了9条数据
                break
        print('{:*^80}'.format('*'))
        return info_list

    def save_file(self, song_text, download_info):
        filepath = './music'
        if not os.path.exists(filepath):
            os.mkdir(filepath)
        filename = download_info[1] + '-' + download_info[2]
        music_url = json.loads(song_text)['data'][0]['url']
        response = requests.get(music_url, headers=self.headers)
        with open(os.path.join(filepath, filename) + '.mp3', 'wb') as f:
            f.write(response.content)
            print("下载完毕!")


if __name__ == '__main__':
    id_url = 'https://music.163.com/weapi/cloudsearch/get/web?csrf_token='# web?csrf_token=
    # id_url = "https://music.xxx.com/weapi/song/enhance/player/url/v1?csrf_token="
    song_url = 'https://music.163.com/weapi/song/enhance/player/url/v1?csrf_token='

    id_d = {
        "hlpretag": "<span class=\"s-fc7\">",
        "hlposttag": "</span>",
        "s": input("请输入歌名或歌手: "),
        "type": "1",
        "offset": "0",
        "total": "true",
        "limit": "30",
        "csrf_token": ""
    }
    # id_d = {
    # "ids":"3778678",
    # "level":"standard",
    # "encodeType":"aac",
    # "csrf_token":""
    # }

    encrypt = EncryptText()
    id_from_data = encrypt.resultEncrypt(str(id_d))

    wyy = WangYiYunMusic()
    id_text = wyy.get_html(id_url, method='POST', from_data=id_from_data)
    info_list = wyy.parse_text(id_text)

    while True:
        input_index = eval(input("请输入要下载歌曲的序号(-1退出): "))
        if input_index == -1:
            break
        download_info = info_list[input_index]
        song_d = {
            "ids": str([download_info[0]]),
            "level": "standard",
            "encodeType": "aac",
            "csrf_token": ""
        }
        song_from_data = encrypt.resultEncrypt(str(song_d))

        song_text = wyy.get_html(song_url, method='POST', from_data=song_from_data)
        wyy.save_file(song_text, download_info)

运行结果:


会下载在目录下面的music文件里,真实能听

最后修改:2021 年 05 月 11 日
如果觉得我的文章对你有用,请随意赞赏