0x01. 問題
由于工作需要,經(jīng)常會(huì)涉及到合并excel表格,而且我剛好在學(xué)習(xí)python,于是就用python來幫我完成這件事,我前后寫了很多版本,下面就是我的合并excel進(jìn)化史, 也是我的python學(xué)習(xí)過程,希望能對(duì)新手或者辦公有所幫助。也希望能得到大家的指點(diǎn),讓我能夠進(jìn)步。直接上代碼。
0x02. 歷程
0x01. Python剛?cè)腴T
這時(shí),對(duì)python才入門,就想著應(yīng)該是找能操作excel的庫(kù),網(wǎng)上找到openpyxl
import openpyxl
import os
'''
@author:steven
date: 2016-05-18
'''
def get_filenames(path):
filenames = []
for i in os.walk(path):
for filename in i[-1]:
full_filename = os.path.join(i[0],filename)
filenames.append(full_filename)
return filenames
def read_excel(path):
wb = openpyxl.load_workbook(path)
ws = wb.active
ncols = ws.max_column
nrows= ws.max_row
titles = []
info = {}
data = []
for i in range(1, ncols+1):
titles.append(ws.cell(row=1,column=i).value)
# print(titles)
for row in range(2,nrows+1):
temp_list = []
for col in range(1, ncols+1):
temp_list.append(ws.cell(row=row,column=col).value)
data.append(temp_list)
# print(data)
return data,titles
def save_excel(data,titles,path = None):
if path == None:
path = 'Total.xlsx'
wb = openpyxl.Workbook()
ws = wb.active
for index, title in enumerate(titles):
ws.cell(row=1,column=index+1,value=title)
for row,item in enumerate(data):
for col,value in enumerate(item):
ws.cell(row=row+2,column=col+1,value=value)
wb.save(path)
if __name__ == '__main__':
print('Program is running...')
path = r'./.'
target_path = r'./.'
data = []
titles = ''
for filename in get_filenames(path):
titles = read_excel(filename)[-1]
for item in read_excel(filename)[0]:
if item not in data:
data.append(item)
save_excel(data,titles,target_path+os.sep+'Total.xlsx')
print('Success!')
聽說有個(gè)庫(kù)叫pandas,大家都說好,到底怎么好,自己試過才知道。
# coding:utf-8
'''
@auth: Steven
@date: 2016-07-27
func: 合并目錄下的所有excel文件,去重后存入新的excel
'''
import os
import pandas as pd
def get_filenames(path):
"""獲取目錄內(nèi)所有文件名"""
filenames = []
for i in os.walk(path):
for filename in i[-1]:
full_filename = os.path.join(i[0],filename)
filenames.append(full_filename)
return filenames
def read_excel(filename):
"""讀入excel,返回dataFrame"""
df = pd.read_excel(filename, index_col=None, headers = 0, na_values=['NA'])
return df
def merge_excel(datas,index):
"""合并數(shù)據(jù),index為參考去重的列名"""
return pd.concat(datas,ignore_index=True).drop_duplicates(index)
if __name__ == '__main__':
print('Program is running...')
path = r'E:\Work'
target_path = r'E:\Work'
data = []
for filename in get_filenames(path):
data.append(read_excel(filename))
df = merge_excel(data)
df.to_excel(target_path+os.sep+'All.xlsx',index=False)
print('Success!')
里面雖然只寫了三個(gè)函數(shù),但其實(shí)可以簡(jiǎn)單點(diǎn)
#!/usr/bin/env python3.5
# -*- coding:utf-8 -*-
"""
Function:
合并目錄下的所有excel文件,去重后存入新的excel
Version: 2016-10-10
Author: Steven
"""
import os
import glob
import pandas as pd
def merge_excel(path, on):
"""合并數(shù)據(jù)"""
all_data = pd.DataFrame()
for f in glob.glob(path):
df = pd.read_excel(f)
all_data = all_data.append(df, ignore_index=True)
return all_data.drop_duplicates(on)
if __name__ == '__main__':
print('Program is running...')
path = r'./*'
target_path = r'./.'
df = merge_excel(path)
df.to_excel(os.path.join(target_path,"Total.xlsx"),index=False)
print('Success!')
其實(shí)后面我又改了很多次,后面有時(shí)間,我會(huì)把新版本再分享出來。