您现在的位置：程式師世界 >> 編程語言 > >> 更多編程語言 >> Python

python爬取某網站上的歌曲

編輯：Python

# -- coding: utf-8 --
""" 代碼實現： 1.發送請求 2.獲取數據 3.解析數據 4.保存數據 """
import requests
import os
import time
file_path = 'musics\\'
if not os.path.exists(file_path):
os.mkdir(file_path)
# 偽裝
headers = {

'Cookie': 'Hm_lvt_cdb524f42f0ce19b169a8071123a4797=1659252846; '
'Hm_lpvt_cdb524f42f0ce19b169a8071123a4797=1659252846; _ga=GA1.2.1319118617.1659252847; '
'_gid=GA1.2.1887200127.1659252847; kw_token=98ZQ8KTOPZ',
'csrf': '98ZQ8KTOPZ',
'Host': 'www.kuwo.cn',
'Referer': 'http://www.kuwo.cn/search/list?key=%E9%99%88%E5%A5%95%E8%BF%85',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 '
'Safari/537.36',
}
keyword = input('請輸入您要搜索的歌手或歌曲：')
dir_name = file_path + keyword + '\\'
if not os.path.exists(dir_name):
os.mkdir(dir_name)
count = success = fail = 0
# 爬取前3頁
for page_num in range(1, 4):
url = f"http://www.kuwo.cn/api/www/search/searchMusicBykeyWord?key={
keyword}&pn={
page_num}&rn=30&httpsStatus=1&reqId=23599db0" \
f"-10a4-11ed-8a4c-b5ed1a6dd7c8 "
# 1.發送請求
response = requests.get(url, headers=headers)
# <Response [403]>：沒有定位到網絡資源
# <Response [200]>：請求成功
# print(response.json())
# 2.獲取數據
# .text獲取字符串數據 .json()獲取字典數據
json_data = response.json()
# 3.解析數據
data_list = json_data['data']['list']
for data in data_list:
try:
artist = data['artist'] # 歌手
name = data['name'] # 歌名
album = data['album'] # 專輯
rid = data['rid'] # 歌曲id
play_url = f'http://www.kuwo.cn/api/v1/www/music/playUrl?mid={
rid}&type=music&httpsStatus=1&reqId=87b1ccc1-10a8' \
f'-11ed-90ca-8da23cf7f79d '
# print(play_url)
count += 1
# 4.保存數據
music_json = requests.get(play_url).json()['data']['url']
# 獲取音頻二進制數據
music_data = requests.get(music_json).content
# 保存音頻
with open(f'{
dir_name}{
name}--{
artist}.mp3', mode='wb') as f:
f.write(music_data)
success += 1
print(f'第{
count}首：{
name}--{
artist}--下載完成')
time.sleep(0.2)
# 出現異常，捕獲並輸出
except Exception as e:
print(e)
print(f'第{
count}首：{
name}--{
artist}--下載失敗')
fail += 1
# 繼續下載下一首
continue
if len(data_list) < 30:
break
time.sleep(0.5)
print(f'下載完畢，成功下載{
keyword}相關歌曲--{
success}首')

爬取效果：