# 导入包 import os import numpy as np import pandas as pd from numpy import NaN from pandas import DataFrame,Series import matplotlib.pyplot as plt from wordcloud import WordCloud plt.rcParams['font.sans-serif']=['SimHei'] #正常显示中文标签 # 绘图魔术命令 %matplotlib inline
# 原始数据 data_users_sourse=dataOpen(file_users,0,True) # 重命名指定列 data_users_sourse=data_users_sourse.rename(columns = {"用户ID": "会员ID"}) # 删除无用列 del data_users_sourse['用户自编码'] del data_users_sourse['组织代码'] del data_users_sourse['密码'] del data_users_sourse['QQ'] del data_users_sourse['微信'] del data_users_sourse['电子邮箱'] del data_users_sourse['备注'] del data_users_sourse['创建人'] del data_users_sourse['修改人'] del data_users_sourse['修改日期'] del data_users_sourse['姓名'] del data_users_sourse['算法ID'] del data_users_sourse['算法名称'] del data_users_sourse['创造日期'] # 删除管理员,老师无用行 #data_users_sourse.drop([0,1],axis = 0,inplace = True) # 通过~取反,选取不含管理员,老师的行 data_users_sourse=data_users_sourse[~data_users_sourse['账号'].isin(['超级管理员','老师'])] # 删除存在NaN的无用行 data_users_sourse.dropna(axis=0, how='any',inplace=True) # 处理后数据 data_users=data_users_sourse
4.3.打开订单表(返回data_info)
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# 原始数据 data_info_sourse=dataOpen(file_meal_order_info,0,True) # 重命名指定列 data_info_sourse=data_info_sourse.rename(columns = {"订单状态:0:未结算;1:结算;2:已锁单": "订单状态"}) data_info_sourse=data_info_sourse.rename(columns = {"门店ID(org_info.id)": "门店ID"}) # 删除无用列 del data_info_sourse['订单状态'] del data_info_sourse['门店ID'] del data_info_sourse['桌子ID'] del data_info_sourse['电话'] # 删除存在NaN的无用行 data_info_sourse.dropna(axis=0, how='any',inplace=True) # 处理后数据 data_info=data_info_sourse
# 选择读第几页 defdataSheet(sheetindex:int): # 原始数据 data_detail_sourse=dataOpen(file_meal_order_detail,sheetindex,True) # 清除菜品名称列的\n\r和空格 col_dishes_name=data_detail_sourse[detail_sm[5]] data_detail_sourse[detail_sm[5]]=[col.replace('\n', '').replace('\r', '').replace(' ', '') for col in col_dishes_name] # 删除无用列 del data_detail_sourse['订单详情ID'] del data_detail_sourse['会员ID'] del data_detail_sourse['菜品ID'] del data_detail_sourse['是否为添加菜'] del data_detail_sourse['添加价格'] del data_detail_sourse['图片'] return data_detail_sourse # 读所有页 df0=dataSheet(0) df1=dataSheet(1) df2=dataSheet(2) data_detail_sourse_all=pd.concat([df0,df1,df2],axis=0,ignore_index=True)# 纵向合并 # 删除存在NaN的无用行 data_detail_sourse_all.dropna(axis=0, how='any',inplace=True) # 处理后数据 data_detail=data_detail_sourse_all
4.5.所有表合并(返回data_all)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
# 根据订单ID合并订单表和订单详情表 data_all1=pd.merge(data_info,data_detail,how='outer',on=['订单ID']) # 根据订会员ID合并用户信息表 data_all=pd.merge(data_all1,data_users,how='inner',on=['会员ID']) # 删除无用列 del data_all['订单ID'] del data_all['会员ID'] del data_all['学号'] del data_all['桌子名称'] del data_all['账号'] del data_all['电话'] del data_all['第一次登录'] del data_all['最后一次登录'] del data_all['付费金额'] # 删除存在NaN的无用行 data_all.dropna(axis=0, how='any',inplace=True) # 打印 file_data_all=r'D:/DataSet/餐饮企业订单信息/data_all.csv' data_all.to_csv(file_data_all,index=False)