程序師世界是廣大編程愛好者互助、分享、學習的平台,程序師世界有你更精彩!
首頁
編程語言
C語言|JAVA編程
Python編程
網頁編程
ASP編程|PHP編程
JSP編程
數據庫知識
MYSQL數據庫|SqlServer數據庫
Oracle數據庫|DB2數據庫
您现在的位置: 程式師世界 >> 編程語言 >  >> 更多編程語言 >> Python

基於python爬取全國2822所高校在各省,近三年的錄取分數線

編輯:Python

最近全國高考結束,考生都在等分當中,鑒於自己之前一直有個想法,爬取各高校的信息,方便考生選擇,因此完成了一下代碼,爬取了全國2822所高校,包括本科和高職院校,在各省的分數線。

下圖是各高校在湖北省的,經過高校軟科排名排序後的近3年錄取分數情況:

完整的數據下載地址
鏈接:https://pan.baidu.com/s/1uohDZQk2SPSjI0htZBJd1g
提取碼:z1db

數據中分數欄,空白部分,說明該學校在該省不招生。

部分代碼如下,未優化…

from ast import Str
from time import sleep
import requests
import json
import csv
from sqlalchemy import null
def save_data(s,data):
with open('./'+s+'.csv', encoding='UTF-8', mode='a+',newline='') as f:
f_csv = csv.writer(f)
f_csv.writerow(data)
f.close()
print("#########"
" 版權所有:殷宗敏 & 數據接口來源-https://www.gaokao.cn/school/search & 在此表示感謝!"
"##########")
url = 'https://static-data.gaokao.cn/www/2.0/school/name.json'
html = requests.get(url).text
unicodestr=json.loads(html) #將string轉化為dict
dat = unicodestr["data"]
province_id=[{"name":11,"value":"北京"},{"name":12,"value":"天津"},{"name":13,"value":"河北"},{"name":14,"value":"山西"},{"name":15,"value":"內蒙古"},{"name":21,"value":"遼寧"},{"name":22,"value":"吉林"},{"name":23,"value":"黑龍江"},{"name":31,"value":"上海"},{"name":32,"value":"江蘇"},{"name":33,"value":"浙江"},{"name":34,"value":"安徽"},{"name":35,"value":"福建"},{"name":36,"value":"江西"},{"name":37,"value":"山東"},{"name":41,"value":"河南"},{"name":42,"value":"湖北"},{"name":43,"value":"湖南"},{"name":44,"value":"廣東"},{"name":45,"value":"廣西"},{"name":46,"value":"海南"},{"name":50,"value":"重慶"},{"name":51,"value":"四川"},{"name":52,"value":"貴州"},{"name":53,"value":"雲南"},{"name":54,"value":"西藏"},{"name":61,"value":"陝西"},{"name":62,"value":"甘肅"},{"name":63,"value":"青海"},{"name":64,"value":"寧夏"},{"name":65,"value":"新疆"}]
for l in province_id:
header = ['名稱', '省', '市', '縣', '地址','介紹' ,'985','211','軟科排名','學校類型','學校屬性','特色專業',"2021分數線","2020分數線","2019分數線"]
with open('./'+l["value"]+'.csv', encoding='utf-8-sig', mode='w',newline='') as f:
f_csv = csv.writer(f)
f_csv.writerow(header)
f.close()
for i in dat:
schoolid = i['school_id']
schoolname = i['name']
url1 = 'https://static-data.gaokao.cn/www/2.0/school/'+schoolid+'/info.json'
print("正在下載"+schoolname)
html1 = requests.get(url1).text
unicodestr1=json.loads(html1) #將string轉化為dict
if len(unicodestr1) !=0:
dat1 = unicodestr1["data"]
name = dat1["name"]
content = dat1["content"]
f985 = dat1["f985"]
if f985 =="1":
f985 = "是"
else:
f985 = "否"
f211 = dat1["f211"]
if f211 =="1":
f211 = "是"
else:
f211 = "否"
ruanke_rank = dat1["ruanke_rank"]
if ruanke_rank=='0':
ruanke_rank =''
type_name= dat1["type_name"]
school_nature_name = dat1["school_nature_name"]
province_name = dat1["province_name"]
city_name = dat1["city_name"]
town_name = dat1["town_name"]
address = dat1["address"]
special =[]
for j in dat1["special"]:
special.append(j["special_name"])
pro_type_min=dat1["pro_type_min"]
fen2021=''
fen2020=''
fen2019=''
for k in pro_type_min.keys():
# print(k)
# print(l["name"])
if int(k) == l["name"]:
print(pro_type_min[k])
for m in pro_type_min[k]:
if m['year'] == 2021:
s = ' '
for j in m['type'].keys():
if j == '2073':
s = s+'物理類:'+m['type'][j] +' '
if j == '2074':
s = s+'歷史類:'+m['type'][j] +' '
if j == '1':
s = s+'理科:'+m['type'][j] +' '
if j == '2':
s = s+'文科:'+m['type'][j] +' '
if j == '3':
s = s+'綜合類:'+m['type'][j] +' '
fen2021 = s
elif m['year'] == 2020:
s = ' '
for j in m['type'].keys():
if j == '2073':
s = s+'物理類:'+m['type'][j] +' '
if j == '2074':
s = s+'歷史類:'+m['type'][j] +' '
if j == '1':
s = s+'理科:'+m['type'][j] +' '
if j == '2':
s = s+'文科:'+m['type'][j] +' '
if j == '3':
s = s+'綜合類:'+m['type'][j] +' '
fen2020 = s
else:
s = ' '
for j in m['type'].keys():
if j == '2073':
s = s+'物理類:'+m['type'][j] +' '
if j == '2074':
s = s+'歷史類:'+m['type'][j] +' '
if j == '1':
s = s+'理科:'+m['type'][j] +' '
if j == '2':
s = s+'文科:'+m['type'][j] +' '
if j == '3':
s = s+'綜合類:'+m['type'][j] +' '
fen2019 = s
tap = (name,province_name,city_name,town_name,address,content,f985,f211,ruanke_rank,type_name,school_nature_name,special,fen2021,fen2020,fen2019)
save_data(l["value"],tap)

  1. 上一篇文章:
  2. 下一篇文章:
Copyright © 程式師世界 All Rights Reserved