The code is for learning and communication only , Do not use for illegal purposes , Encryption algorithms do not provide , For reference only
import requests
import json
import time
import configparser
from queue import Queue
import os
import xlrd
import urllib.parse
import xlwt
'''
The code is for learning only , Do not use abnormally
'''
headers = {
"Content-Type": "application/x-www-form-urlencoded",
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/7.0.15(0x17000f31) NetType/WIFI Language/zh_CN",
}
excelTitle = [" date ", " Product quotation ", " Shop "]
excelPwd = os.getcwd() + "/excels/"
if not os.path.exists(gexcelPwd):
os.mkdir(gexcelPwd)
cf = configparser.ConfigParser()
try:
cf.read(os.getcwd() + "/conf.ini", encoding="utf-8-sig")
except Exception as e:
print(" Program directory does not exist conf.ini The configuration file ~")
exit(0)
def getConf(sec, key):
try:
return cf.get(sec, key)
except Exception as e:
print(e)
print(" The following configuration is not available :" + sec + " - " + key)
exit(0)
threadNums = 1
try:
threadNums = int(getConf("app-sys", "threadNums"))
if threadNums <= 0:
threadNums = 1
except Exception as e:
threadNums = 1
def postHtml(url, data):
for i in range(3):
try:
resp = requests.post(url, data=data, headers=headers)
return json.loads(resp.content.decode("utf-8"))
except Exception as e:
pass
def getSign(page):
while True:
try:
resp = os.popen('node encrypt.js ' + str(page))
return resp.buffer.read()
except Exception as e:
pass
def getCurrentTime():
return str(time.strftime('%Y-%m-%d_%H-%M-%S', time.localtime(time.time())))
class mtSpider(threading.Thread):
def __init__(self, keywordQueue, index, *args, **kwargs):
super(mtSpider, self).__init__(*args, **kwargs)
self.keywordQueue = keywordQueue
self.excelPath = gexcelPwd + "data_" + getCurrentTime() + "_" + str(index) + ".xls"
def writeExcel(self, data):
print("-" * 10)
print(data)
print("-" * 10)
try:
workbook = xlrd.open_workbook(self.excelPath)
sheets = workbook.sheet_names()
worksheet = workbook.sheet_by_name(sheets[0])
rows_old = worksheet.nrows
new_workbook = copy(workbook)
new_worksheet = new_workbook.get_sheet(0)
for j in range(0, len(data)):
try:
new_worksheet.write(rows_old, j, str(data[j]))
except Exception as e:
continue
new_workbook.save(self.excelPath)
except Exception as e:
pass
def getGoodsList(self, keyword, page):
sign = getSign(page)
url = "https://www.mtzh.ltd/api/all/AllProduct/PostSearchNew?sign=" + sign + "&word=" + str(keyword) + "&OpenID=" + OpenID
data = {
"sign": sign,
"word": keyword,
"OpenID": OpenID,
}
res = postHtml(url, data)
try:
return res['Data']
except Exception as e:
pass
def run(self):
self.initExcel()
while True:
if self.keywordQueue.empty():
break
keyword = self.keywordQueue.get()
crawlerNum = keyword['crawlerNum']
currNums = 0
page = 1
stop = False
while True:
goodsList = self.getGoodsList(keywords, page)
if goodsList and len(goodsList) > 0:
for goods in goodsList:
try:
data = []
ModifyDate = ""
ShopNumber = ""
ShopName = ""
try:
ModifyDate = goods['ModifyDate']
except Exception as e:
pass
try:
ShopName = goods['ShopName']
except Exception as e:
pass
data.append(ModifyDate)
data.append(ShopNumber + "\n" + ShopName)
self.writeExcel(data)
currNums += 1
if currNums >= crawlerNum:
stop = True
break
except Exception as e:
pass
if stop:
break
page += 1
time.sleep(5)
else:
break
def getKeywordsQueue():
keywordQueue = Queue(0)
try:
fs = os.listdir(excelPwd)
try:
for f in fs:
try:
tpath = excelPwd + f
df = pds.read_excel(tpath, encoding="utf-8")
rows = df.iterrows()
for row in rows:
try:
rowData = row[1]
keywords = rowData[' key word ']
crawlerNum = 1000
try:
crawlerNum = int(rowData[' Collection quantity ']) if int(rowData[' Collection quantity ']) > 0 else 1000
except Exception as e:
crawlerNum = 1000
if not pds.isnull(keywords):
keywordQueue.put({
"keyword": keywords, "crawlerNum": crawlerNum})
except Exception as e:
pass
except Exception as e:
pass
except Exception as e:
pass
except Exception as e:
pass
return keywordQueue
def main():
global threadNums
keywordLen = keywordQueue.qsize()
if keywordLen > 0:
for i in range(threadNums):
m = mtSpider(keywordQueue, i)
m.start()
else:
print(" No... Was read excel key word , Please check excel Standard or not !")
if __name__ == '__main__':
main()