-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcli.py
More file actions
107 lines (91 loc) · 4.02 KB
/
cli.py
File metadata and controls
107 lines (91 loc) · 4.02 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
#!/usr/local/bin/python
import magic
import json
from json import JSONDecoder
import os
import elasticsearch
# import bz2
import gzip
import shutil
from threading import Thread
import multiprocessing
import psutil
import decimal
es = elasticsearch.Elasticsearch() # use default of localhost, port 9200
es.indices.delete(index='testfsobj', ignore=[400, 404])
meta = {}
mymetapieces = []
def indexcompress(i, base, name):
gzipcompressedsize = 0
# print("files:", os.path.join(base, name))
fullfilepath = os.path.join(base, name)
mymagicstring = magic.from_buffer(open(fullfilepath).read(1024))
filesize = os.path.getsize(os.path.join(base, name))
meta['path'] = os.path.join(base, name)
mymetapieces = {'path': fullfilepath, 'magicident': mymagicstring, 'filesize': filesize,
'gzipcompressedsize': gzipcompressedsize}
# print ("Found thread", i, fullfilepath,json.dumps(mymetapieces))
gzipcompressname = os.path.join(base, name + ".compressiontest.gzip")
with open(fullfilepath, 'rb') as f_in, gzip.open(gzipcompressname, 'wb') as f_out:
shutil.copyfileobj(f_in, f_out)
gzipcompressedsize = os.path.getsize(gzipcompressname)
spacesavingsgzip = filesize - gzipcompressedsize
if gzipcompressedsize > 0:
compressionratio_gzip = filesize / gzipcompressedsize
filenamesuffix = ""
filenamesuffix = os.path.splitext(name)[1]
mymetapieces = {'path': fullfilepath, 'magicident': mymagicstring, 'filesize': filesize,
'gzipcompressedsize': gzipcompressedsize, 'compressionratio_gzip': compressionratio_gzip,
'spacesavingsgzip': spacesavingsgzip, 'filenamesuffix': filenamesuffix}
# print("Compressing thread ", i, gzipcompressname, mymetapieces)
es.index(index='testfsobj', doc_type='message', id=meta['path'], body=mymetapieces)
# print("Indexing files and uploading compression numbers for: thread ",i, gzipcompressname, mymetapieces)
os.remove(gzipcompressname)
# print("Removed temp file thread ",i,gzipcompressname)
def cleanupmygzip(i, base, name):
gzipcompressname = os.path.join(base, name + ".compressiontest.gzip")
os.remove(gzipcompressname)
# print ("Deleted ", gzipcompressname)
filecount = 0
if 1:
for base, subdirs, files in os.walk('testdata'):
for name in files:
if name.endswith('.compressiontest.gzip'):
print("ignore")
else:
filecount = filecount + 1
#print("filecount", filecount)
processcount = 0
# find contenders for compression and queue up for threaded compression
if 1:
# for base, subdirs, files in os.walk('/dlink/action'):
for base, subdirs, files in os.walk('testdata'):
for name in files:
if name.endswith('.compressiontest.gzip'):
for i in range(psutil.cpu_count()):
t = Thread(target=cleanupmygzip, args=(i, base, name))
t.start()
else:
# print("Cores available", psutil.cpu_count())
processcount = processcount + 1
#procdec = decimal.Decimal(processcount)
#totaldec = decimal.Decimal(filecount)
#float.getcontext().prec = 3
procdec = float(processcount)
totaldec = float(filecount)
progress = (procdec / totaldec * 100)
if progress:
print progress, "% Complete"
#print("filecount ", filecount);
for i in range(1):
t = Thread(target=indexcompress, args=(i, base, name))
t.start() # find compressed files from previous runs and delete
if 0:
# for base, subdirs, files in os.walk('/dlink/action'):
for base, subdirs, files in os.walk('testdata/'):
for name in files:
if name.endswith('.compressiontest.gzip'):
# print("Cores available", psutil.cpu_count())
for i in range(1):
t = Thread(target=cleanupmygzip, args=(i, base, name))
t.start()