把当前文件夹里面多个类似的excel合并到一起,以"学号"的列进行排序,生成result.xlsx 用法为:把所有excel表与excel_concat.py放到一个文件夹,安装好python3与pandas之后,在命令行下使用cd命令到该文件夹,输入 python3 excel_concat.py
配置选项:
- SHEETNAME: 指定excel表的sheet name,如果不指定则选取全部sheets
- SKIPROWS: 跳过读取excel表的行数,用于跳过表的标题之类信息
import os | |
import pandas as pd | |
from collections import OrderedDict | |
SHEETNAME = None | |
SKIPROWS = 0 | |
data = OrderedDict() | |
if os.path.exists('result.xlsx'): | |
os.remove('result.xlsx') | |
writer = pd.ExcelWriter('result.xlsx') | |
for i in sorted(os.listdir()): | |
if i[0] != '.' and i.split('.')[-1][:3] == 'xls': | |
shape = 0 | |
print(i.ljust(20), end=' ') | |
table = pd.read_excel(i, sheet_name=SHEETNAME, skiprows=SKIPROWS, dtype=str) | |
for sheet_name in table: | |
if sheet_name not in data: | |
data[sheet_name] = list() | |
data[sheet_name].append(table[sheet_name]) | |
shape += table[sheet_name].shape[0] | |
print(shape) | |
counter = 0 | |
print() | |
for sheet_name in data: | |
print(sheet_name, end='\t') | |
sheet = pd.concat(data[sheet_name]) | |
# 如果不需要排序,把下一行删掉即可 | |
sheet.sort_values('学号', inplace=True) | |
sheet.replace('nan', '', inplace=True) | |
sheet.to_excel(writer,sheet_name=sheet_name, index=False) | |
print(sheet.shape) | |
counter += sheet.shape[0] | |
# import ipdb;ipdb.set_trace() | |
print(counter) | |
writer.close() |