接上面兩篇:
python操作hdfs模塊上傳文件到HDFS
python腳本---上傳本機(jī)文件到遠(yuǎn)端(接上文)
由于與客戶為未正式溝通,所以腳本再次發(fā)生變化,再次。。。(不能再客戶機(jī)器安裝東西,不能在客戶機(jī)器跑腳本。。。)。隨之,第三個(gè)腳本誕生。
此次腳本無(wú)需安裝任何三方庫(kù),對(duì)方機(jī)器只需要開啟ftp服務(wù)就可以。
背景:對(duì)方windwos server每天會(huì)在對(duì)應(yīng)的三個(gè)文件夾內(nèi)各產(chǎn)生一個(gè)文件,每天凌晨1點(diǎn)以后需要同步今天以前的日志到linux server。
應(yīng)用:所以該腳本每天被定時(shí)啟動(dòng)一次,每次過(guò)濾掉當(dāng)天的文件,增量同步之前的日志文件,因?yàn)槿罩疚募灰蟼鞯絣inux,flume就會(huì)去讀取,未拷貝結(jié)束的就會(huì)報(bào)錯(cuò),所以增加了拷貝中名字以.tmp結(jié)尾,拷貝完成以源文件名命名。
1. 腳本文件
syncfile.py
# -*- coding: utf-8 -*-
__author__ = 'hand'
import settings
import os
import subprocess
import logging
from ftplib import FTP
from datetime import datetime
class Filesync(object):
def __init__(self):
self.f = open("exitsfile.log", "a+")
self.remotehost = settings.REMOTE_HOST
self.remoteport = settings.REMOTE_PORT
self.username = settings.REMOTE_USER
self.password = settings.REMOTE_PWD
self.remotepath = settings.REMOTE_PATH
self.localpath = settings.LOCAL_PATH
self.ftppath = settings.FTP_PATH
self.ftp = FTP() # 實(shí)例化一個(gè)ftp對(duì)象
self.ftp.connect(self.remotehost, self.remoteport)
self.ftp.login(self.username, self.password)
@staticmethod
def mylogger():
logger = logging.getLogger("test")
logger.setLevel(logging.DEBUG)
# 增加文件日志輸出
myhandler = logging.FileHandler("downloadinfo.log", "a")
myhandler.setLevel(logging.DEBUG) # 設(shè)置日志文件級(jí)別為INFO
# 增加控制臺(tái)日志輸出
console = logging.StreamHandler()
console.setLevel(logging.DEBUG) # 將控制臺(tái)級(jí)別設(shè)置為DEBUG
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
myhandler.setFormatter(formatter)
console.setFormatter(formatter)
logger.addHandler(myhandler)
logger.addHandler(console)
return logger
def get_upload_files(self, now):
"""獲取待上傳的所有文件"""
file_transfered_set =set() # 用于存儲(chǔ)已經(jīng)上傳過(guò)的文件
root_dir = settings.REMOTE_PATH.replace(settings.FTP_PATH, '') #待上傳文件夾的父文件夾
self.ftp.cwd(root_dir)
total_list = []
now_str = now.strftime('%Y%m%d') # 不獲取當(dāng)天生成的日志
for num, dir in enumerate(self.ftp.nlst()):
if num == 0:
self.ftp.cwd(dir)
file_list = ["\\"+dir+"\\"+i for i in self.ftp.nlst() if now_str not in i]
else:
self.ftp.cwd("..\\" + dir)
file_list = ["\\"+dir+"\\"+j for j in self.ftp.nlst() if now_str not in j]
total_list.extend(file_list)
total_list = [self.remotepath+k for k in total_list] # 所有遠(yuǎn)程文件的列表
remote_path_set = set(total_list) # 用于存儲(chǔ)遠(yuǎn)程文件
self.f.seek(0,0) # 將記錄文件指針?lè)旁谖募^,用于讀文件
for line in self.f.readlines():
file_transfered = line.split("#")[-1].strip()
file_transfered_set.add(file_transfered)
#print remote_path_set
# print "----"
# print file_transfered_set
ready_download_set = remote_path_set ^ file_transfered_set # 待上傳的文件
return list(ready_download_set)
def download_files(self):
logger = self.mylogger()
now = datetime.now()
# print "進(jìn)入 downloadfiles"
ready_downfiles_list = self.get_upload_files(now)
# print ready_downfiles_list
if ready_downfiles_list:
for file_name in ready_downfiles_list:
self.ftp.cwd('..') # 由于self.ftp為全局,所以要跳轉(zhuǎn)回上一級(jí)
file_need = file_name.replace(self.remotepath, '').replace('\\', '/') # 文件名的上級(jí)文件夾+文件名
# print file_need
fath_dir = os.path.split(file_need)[0] # 待上傳文件上一級(jí)目錄
real_file = os.path.split(file_need)[1] # 待上傳文件名
self.ftp.cwd('%s' % fath_dir[1:]) # 跳轉(zhuǎn)到待上傳文件所在文件夾內(nèi)
# 目錄不存在則遞歸創(chuàng)建
if not os.path.exists(self.localpath+fath_dir):
os.makedirs(self.localpath+fath_dir)
# 打開一個(gè)本地文件,用于寫入下載到的信息
local_filename = self.localpath+file_need+'.tmp'
local_file = open(local_filename, "wb")
logger.info("%s 開始下載!" % file_name)
# 下載文件, 路徑必須為self.ftp所停留的路徑下
sucess = self.ftp.retrbinary('RETR %s' % real_file, local_file.write)
self.f.write(now.strftime('%Y/%m/%d %H:%M:%S %A ') + ' # ' + file_name + '\n')
if sucess.endswith('complete.'):
# 去掉sessionlog文件夾下的第一行
local_file.close()
if fath_dir[1:] == 'sessionlog':
with open(local_filename, 'r') as f1:
lines = f1.readlines()
with open(local_filename, 'w') as f2:
for n,line in enumerate(lines):
if n == 0:
continue
f2.write(line)
src_name = local_filename
dst_name = src_name[:-4]
os.renames(src_name, dst_name)
logger.info("%s 下載完成!" % file_name)
else:
print "沒(méi)有新文件需要下載!"
self.ftp.quit()
self.f.close()
if __name__ == "__main__":
fs = Filesync()
fs.download_files()
2. 配置文件
settings.py
# ftp地址
REMOTE_HOST = 'XX.XX.XX.XX'
# 端口
REMOTE_PORT = 21
# 電腦用戶
REMOTE_USER = 'hand'
# 用戶密碼
REMOTE_PWD = '123456'
# ftp根目錄
FTP_PATH = 'C:\\'
# 文件路徑
REMOTE_PATH = 'C:\\test'
# 上傳到的路徑
LOCAL_PATH = '/storage/'