1. 前言
场景:有3个不同的excel文件A,B,C,记录了老师的姓名、生日、电话号码等信息,需要将这些信息整合到一个excel文件中;其中,如果B,C中出现了A中相同的老师,需要以A的信息为准,舍弃B,C的信息;如果B,C中出现了A没有的老师,需要进行整合
写excel格式不统一的都是傻Ⅹ
某位老师让我帮忙处理一下excel表格,真不知道他们怎么改的,格式都tm不统一,死了得了。
2. 代码 version 1
2.1 time_trans
time_trans.py主要用作日期转换,因为有的傻叉,日期格式不统一,我服了
import datetime
def int2str(input):
# 定义Excel日期序列号
excel_date = input
# Excel和Python日期系统之间的差异(Python的1是0001-01-01,而Excel的1是1900-01-01)
# Excel错误地将1900年当作了闰年,所以需要减去1天
delta = datetime.timedelta(days=excel_date - 1)
# Excel的起始日期
start_date = datetime.datetime(1899, 12, 31)
# 计算实际日期
actual_date = start_date + delta
# 转换成字符串
date_str = actual_date.strftime('%m月%d日')
return date_str
def datetime2str(input):
output = input.strftime('%m月%d日')
return output
if __name__ == "__main__":
# 定义Excel日期序列号
excel_date = 43468
# Excel和Python日期系统之间的差异(Python的1是0001-01-01,而Excel的1是1900-01-01)
# Excel错误地将1900年当作了闰年,所以需要减去1天
delta = datetime.timedelta(days=excel_date - 1)
# Excel的起始日期
start_date = datetime.datetime(1899, 12, 31)
# 计算实际日期
actual_date = start_date + delta
# 转换成字符串
date_str = actual_date.strftime('%m月%d日')
print(date_str)
print(type(actual_date)== datetime.datetime)
print(type( datetime.datetime(2023, 6, 6, 0, 0))==datetime.datetime)
2.2 excel_proc
excel_proc.py负责主逻辑
import openpyxl
import datetime
from time_trans import datetime2str, int2str
count = 0
# 读取excel文件
workbook_main = openpyxl.load_workbook('./excel_process/main.xlsx')
workbook_sub = openpyxl.load_workbook('./excel_process/material1.xlsx')
workbook_sub2 = openpyxl.load_workbook('./excel_process/material2.xlsx')
# 获取sheet
sheet_main = workbook_main['Sheet1']
sheet_sub = workbook_sub['Sheet1']
sheet_sub2 = workbook_sub2['Sheet1']
# 获取main的值
def get_value(sheet, check_value, position):
# print("check_value:{}".format(check_value))
if type(check_value) == int:
check_value = str(check_value)
for row in sheet.rows:
cell = row[position].value
if type(cell) == int:
cell = str(cell)
if cell == check_value:
return False
else:
continue
return True
# 获取所需行的单元值
def get_row(row, position_list):
global count
list1 = []
for i in position_list:
if type(row[i].value) == datetime.datetime:
print("type is datetime:{}".format(row[i].value))
list1.append(datetime2str(row[i].value))
else:
if type(row[i].value) == int and row[i].value < 100000:
print("type is int:{}".format(row[i].value))
list1.append(int2str(row[i].value))
else:
print("type:{} value:{}".format(type(row[i].value),row[i].value))
list1.append(row[i].value)
print("append {}".format(list1))
count += 1
return list1
# process material1
for row in sheet_sub.rows:
cell = row[4].value
if cell == None:
continue
print("process material1: phone number:{}".format(cell))
if get_value(sheet_main, cell, 2) == False:
continue
else:
sheet_main.append(get_row(row,[3,1,4]))
# process matirial2
for row in sheet_sub2.rows:
cell = row[3].value
if cell == None:
continue
print("process material2: type:{}, phone number:{}".format(type(cell),cell))
if get_value(sheet_main, cell, 2) == False:
continue
else:
sheet_main.append(get_row(row,[1,2,3]))
workbook_main.save('test.xlsx')
print("change success!")
print("add {} rows".format(count))
3. 代码 version2
上一个版本太过粗糙,耦合严重,不利于后续利用,重新思考了架构后,重写了代码。
3.1 time_trans.py
将未知的时间类型转换成统一的str类型时间格式
import datetime
def unknown_date2str(input):
if type(input) == datetime.datetime:
return datetime2str(input)
elif type(input) == int and input < 100000:
return int2str(input)
else:
return input
def int2str(input):
# 定义Excel日期序列号
excel_date = input
# Excel和Python日期系统之间的差异(Python的1是0001-01-01,而Excel的1是1900-01-01)
# Excel错误地将1900年当作了闰年,所以需要减去1天
delta = datetime.timedelta(days=excel_date - 1)
# Excel的起始日期
start_date = datetime.datetime(1899, 12, 31)
# 计算实际日期
actual_date = start_date + delta
# 转换成字符串
date_str = actual_date.strftime('%m月%d日')
return date_str
def datetime2str(input):
output = input.strftime('%m月%d日')
return output
if __name__ == "__main__":
# 定义Excel日期序列号
excel_date = 43468
# Excel和Python日期系统之间的差异(Python的1是0001-01-01,而Excel的1是1900-01-01)
# Excel错误地将1900年当作了闰年,所以需要减去1天
delta = datetime.timedelta(days=excel_date - 1)
# Excel的起始日期
start_date = datetime.datetime(1899, 12, 31)
# 计算实际日期
actual_date = start_date + delta
# 转换成字符串
date_str = actual_date.strftime('%m月%d日')
print(date_str)
print(type(actual_date)== datetime.datetime)
print(type( datetime.datetime(2023, 6, 6, 0, 0))==datetime.datetime)
3.2 number_trans.py
将未知的号码类型转换成统一的str类型号码格式
def unknown_num2str(input):
if type(input) == int:
return str(input)
else:
return input
3.3 excel_proc.py
主逻辑
import openpyxl
import datetime
from time_trans import unknown_date2str
from number_trans import unknown_num2str
# 读取excel文件
workbook_main = openpyxl.load_workbook('./excel_process/main.xlsx')
workbook_sub = openpyxl.load_workbook('./excel_process/material1.xlsx')
workbook_sub2 = openpyxl.load_workbook('./excel_process/material2.xlsx')
# 获取sheet
sheet_main = workbook_main['Sheet1']
sheet_sub = workbook_sub['Sheet1']
sheet_sub2 = workbook_sub2['Sheet1']
# process phone number
def get_main_value(sheet, check_value, position):
check_value = unknown_num2str(check_value)
for row in sheet.rows:
cell = unknown_num2str(row[position].value)
if cell == check_value:
return False
else:
continue
return True
# 获取添加行的单元值
def get_row(row, position_list):
list1 = []
for i in position_list:
if type(row[i].value) == datetime.datetime or (type(row[i].value) == int and row[i].value < 100000):
list1.append(unknown_date2str(row[i].value))
else:
list1.append(unknown_num2str(row[i].value))
print("append {}".format(list1))
return list1
# process material1 get_phone_number
for row in sheet_sub.rows:
cell = row[4].value
if cell == None:
continue
# print("process material1: phone number:{}".format(cell))
if get_main_value(sheet_main, cell, 2) == False:
continue
else:
sheet_main.append(get_row(row,[3,1,4]))
# process matirial2
for row in sheet_sub2.rows:
cell = row[3].value
if cell == None:
continue
# print("process material2: type:{}, phone number:{}".format(type(cell),cell))
if get_main_value(sheet_main, cell, 2) == False:
continue
else:
sheet_main.append(get_row(row,[1,2,3]))
workbook_main.save('test1.xlsx')
print("change success!")