How can I change this to be based on django frame , adopt web Implement intelligent answer on Browser
import os
import time
import logging
from collections import deque
import jieba
import jieba.posseg as pseg
from utils import get_logger, similarity
jieba.dt.tmp_dir = "./"
jieba.default_logger.setLevel(logging.ERROR)
logger = get_logger('faqrobot', logfile="faqrobot.log")
class zhishiku(object):
def init(self, q): # a Yes, the answer ( Must be 1 to ), q Is the problem (1 One or more )
self.q = [q]
self.a = ""
self.sim = 0
self.q_vec = []
self.q_word = []
def __str__(self): return 'q=' + str(self.q) + '\na=' + str(self.a) + '\nq_word=' + str(self.q_word) + '\nq_vec=' + str(self.q_vec) # return 'a=' + str(self.a) + '\nq=' + str(self.q)
class FAQrobot(object):
def init(self, zhishitxt='test.txt', lastTxtLen=10, usedVec=False):
# usedVec If it is True Word vectors are parsed during initialization , Speed up the calculation of sentence similarity
self.lastTxt = deque([], lastTxtLen)
self.zhishitxt = zhishitxt
self.usedVec = usedVec
self.reload()
def load_qa(self): print(' Q & a knowledge base starts loading ') self.zhishiku = [] with open(self.zhishitxt, encoding='utf-8') as f: txt = f.readlines() abovetxt = 0 # The type of the previous line : 0 blank / notes 1 answer 2 problem for t in txt: # Read FAQ text file t = t.strip() if not t or t.startswith('#'): abovetxt = 0 elif abovetxt != 2: if t.startswith('【 problem 】'): # Enter the first question self.zhishiku.append(zhishiku(t[4:])) abovetxt = 2 else: # Enter the answer text ( Not the first line ) self.zhishiku[-1].a += '\n' + t abovetxt = 1 else: if t.startswith('【 problem 】'): # The input problem ( Not the first line ) self.zhishiku[-1].q.append(t[4:]) abovetxt = 2 else: # Enter the answer text self.zhishiku[-1].a += t abovetxt = 1 for t in self.zhishiku: for question in t.q: t.q_word.append(set(jieba.cut(question)))def load_embedding(self): from gensim.models import Word2Vec if not os.path.exists('Word60.model'): self.vecModel = None return # load 60 The word vector of dimension (Word60.model,Word60.model.syn0.npy,Word60.model.syn1neg.npy) self.vecModel = Word2Vec.load('Word60.model') for t in self.zhishiku: t.q_vec = [] for question in t.q_word: t.q_vec.append({t for t in question if t in self.vecModel.index2word})def reload(self): self.load_qa() self.load_embedding() print(' The Q & a knowledge base is loaded ')def maxSimTxt(self, intxt, simCondision=0.1, simType='simple'): """ Find the sentences in the knowledge base that are most similar to the input sentences simType=simple, simple_POS, vec """ self.lastTxt.append(intxt) if simType not in ('simple', 'simple_pos', 'vec'): return 'error: maxSimTxt Of simType Type does not exist : {}'.format(simType) # If the word vector is not loaded , Then downgrade to simple_pos Method embedding = self.vecModel if simType == 'vec' and not embedding: simType = 'simple_pos' for t in self.zhishiku: questions = t.q_vec if simType == 'vec' else t.q_word in_vec = jieba.lcut(intxt) if simType == 'simple' else pseg.lcut(intxt) t.sim = max( similarity(in_vec, question, method=simType, embedding=embedding) for question in questions ) maxSim = max(self.zhishiku, key=lambda x: x.sim) logger.info('maxSim=' + format(maxSim.sim, '.0%')) if maxSim.sim < simCondision: return ' I'm sorry , I don't understand what you mean . Please go to the manual customer service .' return maxSim.adef answer(self, intxt, simType='simple'): """simType=simple, simple_POS, vec, all""" if not intxt: return '' if simType == 'all': # Used to test the accuracy of different types of methods , Return empty text for method in ('simple', 'simple_pos', 'vec'): outtext = 'method:\t' + self.maxSim(intxt, simType=method) print(outtext) return '' else: outtxt = self.maxSimTxt(intxt, simType=simType) # Output reply content , And recorded in the log return outtxt
if name == 'main':
robot = FAQrobot('test.txt', usedVec=False)
while True:
# simType=simple, simple_pos, vec, all
print(' reply :' + robot.answer(input(' Input :'), 'simple_pos') + '\n')