# One 、 Find websites
##( One ) Open the official website of Jinan Municipal Bureau of statistics (http://jntj.jinan.gov.cn/col/col18253/index.html) ##( Two ) open “2021 Annual data ” link ##( 3、 ... and ) Find a random table , Right click to check ##( Four ) The data request method is get, The web page is written as html, The table format is (table) label # Two 、 Write code
##( One ) Import the required Library
import pandas as pd # In the data
import requests # Send a request
##( Two ) Define the target URL
url = 'http://jntj.jinan.gov.cn/art/2022/2/7/art_18279_4745121.html' # Target website
##( 3、 ... and ) Define a request header , And send the request
header = { # Request header
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Mobile Safari/537.36', }
r = requests.get(url, headers=header) # Send a request
##( Four ) Read the page content returned by the request
df = pd.read_html(r.content)
table_count = len(df)
print(' A total of... Were detected {} A tabular data !'.format(table_count))
##( 5、 ... and ) In the data
with pd.ExcelWriter('data.xlsx') as writer:
for i in range(2, table_count - 1): # The first 0、1、-1 A form is not required , All from the 2 Start
df[i].to_excel(writer, sheet_name=str(i), index=False, header=False)
print(' Saved completed : The first {} A form '.format(str(i)))
# 3、 ... and 、 The complete code is as follows
import pandas as pd # In the data
import requests # Send a request
url = 'http://jntj.jinan.gov.cn/art/2022/2/7/art_18279_4745121.html' # Target website
header = { # Request header
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Mobile Safari/537.36', }
r = requests.get(url, headers=header) # Send a request
df = pd.read_html(r.content)
table_count = len(df)
print(' A total of... Were detected {} A tabular data !'.format(table_count))
with pd.ExcelWriter('data.xlsx') as writer:
for i in range(2, table_count - 1): # The first 0、1、-1 A form is not required , All from the 2 Start
df[i].to_excel(writer, sheet_name=str(i), index=False, header=False)
print(' Saved completed : The first {} A form '.format(str(i)))
# Four 、 Click on the run # 5、 ... and 、 stay excel View the exported data in # 6、 ... and 、 share 21 A form , All exported successfully , Just pick two as examples