Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.directory
*.pyc
__pycache__/
data/
clusters/
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
super simple example of image clustering with image hashes ([wavelet hashing](https://fullstackml.com/wavelet-image-hash-in-python-3504fdd282b5) in particular) and DBSCAN.

install deps: `pip3 install -r requirements.txt`

to run:
- store all images in `static/img/` directory
- empty `data` directory if u wish to do a re-run of clustering
- run `python3 server.py --server` to do an interactive run and decide the cluster distance and min-samples count.
- run without `--server` argument to move the images to `clusters` directory

```
usage: server.py [-h] [-s] [--cluster distance] [--min-samples count]

Image Cluster

optional arguments:
-h, --help show this help message and exit
-s, --server run flask server (default: False)
--cluster distance cluster distance (default: 1)
--min-samples count minimum number of samples in a cluster (default: 1)
```
11 changes: 10 additions & 1 deletion cluster.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import imagehash
import numpy as np
from PIL import Image
Expand Down Expand Up @@ -38,4 +39,12 @@ def cluster(mat, fnames, eps, min_samples):
clusters = defaultdict(list)
for i, lbl in enumerate(labels):
clusters[lbl].append(fnames[i])
return clusters
return clusters

def create_directory(directory):
try:
os.makedirs(directory)
return True
except OSError:
if not os.path.isdir(directory): return False
return True
2 changes: 0 additions & 2 deletions data/.gitignore

This file was deleted.

9 changes: 0 additions & 9 deletions readme.md

This file was deleted.

7 changes: 4 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
ImageHash==3.4
scikit-learn==0.18
numpy==1.11.1
ImageHash
scikit-learn
numpy
flask
88 changes: 63 additions & 25 deletions server.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,38 @@
import json
import shutil
import numpy as np
from glob import glob
from flask import Flask, request, render_template, abort
from cluster import compute_hashes, compute_dists, cluster
from cluster import compute_hashes, compute_dists, cluster, create_directory

import argparse
parser = argparse.ArgumentParser(description='Image Cluster')

# begin arguments
parser.add_argument('-s', '--server', action='store_true',
help='run flask server (default: %(default)s)\n\n')
parser.add_argument('--cluster', metavar='distance', default=1,
type=float, help='cluster distance (default: %(default)s)')
parser.add_argument('--min-samples', metavar='count', default=1,
type=int, help='minimum number of samples in a cluster (default: %(default)s)')

args = parser.parse_args()


if __name__ == '__main__':

if not create_directory('static/img/'):
print('Couldn\'t create static images directory!')
raise SystemExit

try:
mat = np.load('data/dist_mat.npy')
fnames = json.load(open('data/fnames.json', 'r'))
except FileNotFoundError:
print('computing hashes & distance matrix...')
if not create_directory('data'):
print('Couldn\'t create data directory!')
raise SystemExit

print('Computing hashes & distance matrix...')
hashes, fnames = compute_hashes(glob('static/img/*'))
mat = compute_dists(hashes)

Expand All @@ -19,25 +41,41 @@
json.dump(fnames, f)
np.save('data/dist_mat.npy', mat)

clusters = {}
app = Flask(__name__)

@app.route('/', methods=['GET', 'POST'])
def index():
global clusters
if request.method == 'POST':
eps = float(request.form.get('eps', 20))
min_samples = int(request.form.get('min_samples', 2))
clusters = cluster(mat, fnames, eps, min_samples)
print(clusters)
return render_template('index.html', clusters=clusters)

@app.route('/cluster/<int:id>', methods=['GET', 'POST'])
def view_cluster(id):
try:
print(clusters[id])
return render_template('cluster.html', cluster=clusters[id])
except KeyError:
abort(404)

app.run(host='0.0.0.0', port=5001)

if not args.server:
eps = args.cluster
min_samples = args.min_samples
clusters = cluster(mat, fnames, eps, min_samples)

print('Generated {} clusters!'.format(len(clusters)))
print('Moving images to "clusters" directory..')
for cluster in clusters:
create_directory('clusters/{}'.format(cluster))
for image in clusters[cluster]:
shutil.move(image, 'clusters/{}/'.format(cluster))
print('Created {} clusters and stored the images in "clusters" directory!'.format(len(clusters)))

else:
from flask import Flask, request, render_template, abort
clusters = {}
app = Flask(__name__)

@app.route('/', methods=['GET', 'POST'])
def index():
global clusters
if request.method == 'POST':
eps = float(request.form.get('eps', 20))
min_samples = int(request.form.get('min_samples', 2))
clusters = cluster(mat, fnames, eps, min_samples)
print(clusters)
return render_template('index.html', clusters=clusters)

@app.route('/cluster/<int:id>', methods=['GET', 'POST'])
def view_cluster(id):
try:
print(clusters[id])
return render_template('cluster.html', cluster=clusters[id])
except KeyError:
abort(404)

app.run(host='0.0.0.0', port=5002)
33 changes: 0 additions & 33 deletions tests.py

This file was deleted.