#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#pylint: disable=W1401
"""
Created on Thu Nov 9 15:12:30 2017
@author: lu
"""
import jieba
import pandas as pd
from gensim import corpora, models
"""
由於每個階段的數據文件存在依賴關系,所以這裡輸出保存在了data/目錄下
programmer_1-->提取數據
programmer_2-->數據去重
programmer_3-->利用正則去除一些數據
programmer_4-->使用jieba分詞
programmer_5-->分詞之後的語義分析,LDA模型分析正面負面情感
"""
def programmer_1():
inputfile = "data/huizong.csv"
outputfile = "data/meidi_jd.txt"
data = pd.read_csv(inputfile, encoding="utf-8")
data = data[[u"評論"]][data[u"品牌"] == u"美的"]
data.to_csv(outputfile, index=False, header=False, encoding="utf8")
def programmer_2():
inputfile = "data/meidi_jd.txt"
outputfile = "data/meidi_jd_process_1.txt"
data = pd