#!/usr/bin/env python
# coding=utf-8
import os
import pandas as pd
saveDir = "/home/fighter/imageretrievedata/for_train/"
def deal_cluster(filePaths):
filetype = ["_classifyType.csv", "_features.csv"]
absPrefix = "/".join(filePaths[0].split("/")[:-1])
fileIndexSet = set()
for item in filePaths:
fileIndexSet.add(item.split("/")[-1].split("_")[0])
pd_concat_data = []
for item in fileIndexSet:
if os.path.exists(absPrefix + "/" + str(item) + filetype[0]) and os.path.exists(absPrefix + "/" + str(item) + filetype[1]):
pd_label = pd.read_csv(absPrefix + "/" + str(item) + filetype[0])
pd_features = pd.read_csv(absPrefix + "/" + str(item) + filetype[1], header=None)
pd_features["class"] = pd.Series(pd_label["AlgorithmClass"].values.tolist(), index=pd_features.index)
pd_features.sort_values(["class"], inplace=True)
pd_features.drop_duplicates(["class"], inplace=True)
del pd_features["class"]
pd_concat_data.append(pd_features)
del pd_features
elif os.path.exists(absPrefix + "/" + str(item) + filetype[1]):
pd_features = pd.read_csv(absPrefix + "/" + str(item) + filetype[1], header=None)
pd_features.sample(frac=0.4, replace=True)
pd_concat_data.append(pd_features)
featureData = None
featureData = pd_concat_data[0]
for i in range(1, len(pd_concat_data)):
featureData = pd.concat([featureData, pd_concat_data[i]])
objectName = absPrefix.split('/')[-1]
if not os.path.exists(saveDir + objectName):
os.makedirs(saveDir + objectName)
featureData.to_csv(saveDir + objectName + "/features.csv", header=False, index=False)
print saveDir + objectName + "/features.csv"
def copy_dirs(absdirpath):
for abspath, dirnames, filenames in os.walk(absdirpath):
if abspath[-1] != '/':
abspath += "/"
for dirname in dirnames:
if dirname[0] == '.':
continue
dirPath = abspath + dirname
for _,_, filelist in os.walk(dirPath):
absFilePath = []
for item in filelist:
if item[-3:] != "csv":
continue
absFilePath.append(dirPath + "/" + item)
deal_cluster(absFilePath)
if __name__ == "__main__":
absdirpath = "/home/fighter/imageretrievedata/feature/"
copy_dirs(absdirpath)
Created
August 14, 2016 13:50
-
-
Save guiyang882/5102808f4ffd878781acd53380cd6939 to your computer and use it in GitHub Desktop.
使用Python拷贝文件夹中的文件,并对文件进行些操作!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment