import datetime
class Message:# Daily messages , Sort by timestamp
timestamp:int
messageId:int # Message order is used for rewriting , Make orderly
def __init__(self, timestamp, messageId):
self.timestamp= timestamp
self.messageId = messageId
def Conversion(timestamp):# Convert millisecond timestamps to dates
d = datetime.datetime.fromtimestamp(timestamp/1000)
dateStr=d.strftime("%Y-%m-%d")
#print(timestamp)
return dateStr
def getZhangsan(): # Get valid data
address = r"D:\code_test\Text_processing\records\record_" # File storage address
ansAddress=r"D:\code_test\Text_processing\ans" # Answer storage address
cnt = 0# Number of valid messages
for id in range(0, 1000): # txt Text id The location of , open 1000 A text
print(id)
with open(address + str(id) + '.txt', 'r', encoding="utf-8") as f:
list1 = f.readlines()
row = len(list1) # Number of lines of text
for i in range(0, row):
if (list1[i][10:12] == ' Zhang San '):
timestamp=list1[i][28:41] # Millisecond time stamp
#print(timestamp)
timeStr=Conversion(int(timestamp))
# File is written to
fzs = open(ansAddress+"\\"+timeStr+'.txt', 'a')# Write the corresponding days file
fzs.write(list1[i])
fzs.close()
cnt += 1
print(cnt)
def sortMessageByTime():
ansAddress = r"D:\code_test\Text_processing\ans" # Answer storage address
for date in range(8,16): # Open each day in order [8,15] Daily data
dateStr="2022-04-"
if date<10:
dateStr+='0'
dateStr+=str(date)
print(dateStr)
MList=[] # Used for timestamp sorting
f=open(ansAddress+"\\"+ dateStr + '.txt', 'r')
list1 = f.readlines()
row = len(list1) # Number of lines of text
for i in range(0, row):
timestamp=int(list1[i][28:41])
MList.append( Message(timestamp,i) )
MList.sort(key=lambda e:e.timestamp)
# Write by time
fzs = open(ansAddress + "_sort_by_timestamp\\" + dateStr + '.txt', 'a') # Write data in chronological order
for i in range(0, row):
msId=MList[i].messageId
fzs.write(list1[msId])
fzs.close()
f.close()
getZhangsan()
sortMessageByTime()