From 85d92dcaa944664e16087aa496598bcbf95f8017 Mon Sep 17 00:00:00 2001
From: Aleksandr Zhdankin <azhdanki@Aleksandrs-MacBook-Pro.local>
Date: Tue, 21 Nov 2023 07:23:04 -0800
Subject: [PATCH 1/7] Initial commit - initial code for file uploading.

---
 azhdankin/README.md        | 29 ++++++++++++++++++++++++
 azhdankin/cloud_storage.py | 34 ++++++++++++++++++++++++++++
 azhdankin/upload_files.py  | 46 ++++++++++++++++++++++++++++++++++++++
 azhdankin/uploader.py      | 16 +++++++++++++
 4 files changed, 125 insertions(+)
 create mode 100644 azhdankin/README.md
 create mode 100644 azhdankin/cloud_storage.py
 create mode 100644 azhdankin/upload_files.py
 create mode 100644 azhdankin/uploader.py

diff --git a/azhdankin/README.md b/azhdankin/README.md
new file mode 100644
index 00000000..53dae762
--- /dev/null
+++ b/azhdankin/README.md
@@ -0,0 +1,29 @@
+# Multi-threaded file uploader (Backend)
+
+> Ideal candidate: skilled python developer with solid knowledge of cloud and distributed systems.
+
+# Overview
+
+Create a python application that uploads a set of given files to a cloud object storage in parallel through the cloud provider's or third party API.
+
+# Requirements
+
+1. Support up to 100,000nds of files, all inside one directory with arbitrary sizes. The root directory may contain subdirectories.
+1. The object storage container which holds the objects is private and only credential-based access is allowed.
+1. Each object inside object storage should have an associated metadata which contains file size, last modification time and file permissions.
+
+# Expectations
+
+- Fast (utilize full network bandwidth), low CPU (do not block all other processes) and low Memory (<25% tentatively) file uploader
+- Support for AWS S3
+- Modular and Object oriented implementation (to add other cloud providers)
+- Clean and documented code
+- Tests
+
+# Timeline
+
+We leave exact timing to the candidate. Must fit Within 5 days total.
+
+# Notes
+
+- we can provide temporary credentials to access AWS/Azure.
diff --git a/azhdankin/cloud_storage.py b/azhdankin/cloud_storage.py
new file mode 100644
index 00000000..d4a8ee28
--- /dev/null
+++ b/azhdankin/cloud_storage.py
@@ -0,0 +1,34 @@
+from google.cloud import storage
+
+class CloudStorage:
+    def __init__(self, name):
+        self.name = name 
+
+    def upload_object(self, object_name, source_file_name):
+        pass
+    
+    def __str__(self):
+        return self.name
+
+class CloudStorageGCP(CloudStorage):
+    def __init__(self, bucket_name, project=None):
+        super().__init__("CloudStorageGCP")
+        self.project = project
+        self.client = storage.Client(project=self.project)
+        buckets = list(self.client.list_buckets())
+        bucket_is_found = False
+        for bucket in buckets:
+          if bucket.name == bucket_name: 
+            self.bucket = self.client.bucket(bucket_name)
+            bucket_is_found = True
+            break
+        if not bucket_is_found:
+           self.bucket = self.client.create_bucket(bucket_name)
+
+    def upload_object(self, object_name, source_file_name):
+        # Create a new blob object
+        blob = self.bucket.blob(object_name)
+        # Upload the file to the bucket
+        blob.upload_from_filename(source_file_name)
+
+
diff --git a/azhdankin/upload_files.py b/azhdankin/upload_files.py
new file mode 100644
index 00000000..e570ce22
--- /dev/null
+++ b/azhdankin/upload_files.py
@@ -0,0 +1,46 @@
+import os
+import glob
+from concurrent.futures import ThreadPoolExecutor
+
+from cloud_storage import CloudStorageGCP
+from uploader import FileUploader
+
+
+path = "./files/*"
+
+file_list = []
+
+for entry in glob.iglob(path, recursive=True):
+  if os.path.isfile(entry):
+    file_list.append(entry)
+  else:
+    entry = entry + "/*"
+    for element in glob.iglob(entry, recursive=True):
+      if os.path.isfile(element):
+        file_list.append(element)
+
+MAX_UPLOAD_WORKERS = 2 
+
+file_list_len = len(file_list)
+
+step = int(file_list_len/MAX_UPLOAD_WORKERS)
+remainder = file_list_len%MAX_UPLOAD_WORKERS
+
+storage = CloudStorageGCP("azhdanki-test-bucket1", project='rewotes')
+
+pool = ThreadPoolExecutor (max_workers=MAX_UPLOAD_WORKERS)
+
+i=0
+
+while i < (file_list_len - remainder):
+  uploader = FileUploader (storage, file_list, i, step)
+  pool.submit (uploader.run())
+  i += step
+
+if remainder > 0:
+  uploader = FileUploader (storage, file_list, i, remainder)
+  pool.submit (uploader.run())
+
+pool.shutdown (wait=True)
+
+
diff --git a/azhdankin/uploader.py b/azhdankin/uploader.py
new file mode 100644
index 00000000..17771a85
--- /dev/null
+++ b/azhdankin/uploader.py
@@ -0,0 +1,16 @@
+import os
+from cloud_storage import CloudStorage
+
+class FileUploader:
+    def __init__(self, storage, files, start_idx, count):
+        self.storage = storage 
+        self.files = files
+        self.start = start_idx
+        self.count = count
+
+    def run (self):
+        for i in range(self.start, self.start + self.count):
+          object_name = os.path.split(self.files[i])[1]
+          self.storage.upload_object(object_name, self.files[i])
+
+

From fd1c644bf10a77fe3fd520163072a233e7da32d1 Mon Sep 17 00:00:00 2001
From: Aleksandr Zhdankin <azhdanki@Aleksandrs-MacBook-Pro.local>
Date: Tue, 21 Nov 2023 10:35:37 -0800
Subject: [PATCH 2/7] Added file gen utility plus some optimization to the code
 creating the bucket.

---
 azhdankin/cloud_storage.py | 13 ++++---------
 azhdankin/create_files.py  | 27 +++++++++++++++++++++++++++
 azhdankin/seed-file.txt    |  3 +++
 azhdankin/upload_files.py  | 13 ++++++++++++-
 azhdankin/uploader.py      |  2 +-
 5 files changed, 47 insertions(+), 11 deletions(-)
 create mode 100644 azhdankin/create_files.py
 create mode 100644 azhdankin/seed-file.txt

diff --git a/azhdankin/cloud_storage.py b/azhdankin/cloud_storage.py
index d4a8ee28..c71142b8 100644
--- a/azhdankin/cloud_storage.py
+++ b/azhdankin/cloud_storage.py
@@ -14,16 +14,11 @@ class CloudStorageGCP(CloudStorage):
     def __init__(self, bucket_name, project=None):
         super().__init__("CloudStorageGCP")
         self.project = project
+        self.bucket_name = bucket_name
         self.client = storage.Client(project=self.project)
-        buckets = list(self.client.list_buckets())
-        bucket_is_found = False
-        for bucket in buckets:
-          if bucket.name == bucket_name: 
-            self.bucket = self.client.bucket(bucket_name)
-            bucket_is_found = True
-            break
-        if not bucket_is_found:
-           self.bucket = self.client.create_bucket(bucket_name)
+        self.bucket = self.client.bucket(self.bucket_name)
+        if not self.bucket.exists():
+          self.bucket = self.client.create_bucket(self.bucket_name)
 
     def upload_object(self, object_name, source_file_name):
         # Create a new blob object
diff --git a/azhdankin/create_files.py b/azhdankin/create_files.py
new file mode 100644
index 00000000..dafed028
--- /dev/null
+++ b/azhdankin/create_files.py
@@ -0,0 +1,27 @@
+import sys
+import os
+import random
+
+path = "./files/"
+name_prefix = "file-2-upload"
+
+num_files = 10
+
+if len(sys.argv) > 1:
+ num_files = int(sys.argv[1])
+
+seed_file = "./seed-file.txt"
+
+file=open(seed_file,"r")
+seed_content = file.read()
+target_file_content = ""
+
+for target_file_idx  in range (0, num_files):
+  repeat = random.randint(1,100)
+  for chunk_num in range (0, repeat):
+    target_file_content = target_file_content + seed_content 
+  target_file = open (path + name_prefix + str(target_file_idx) + ".txt", 'w')
+  target_file.write (target_file_content)  
+  target_file_content = ""
+
+
diff --git a/azhdankin/seed-file.txt b/azhdankin/seed-file.txt
new file mode 100644
index 00000000..6c1513ee
--- /dev/null
+++ b/azhdankin/seed-file.txt
@@ -0,0 +1,3 @@
+dsgsddhjdnkdhfkdfjhekfndsmcndkfhed
+dhfdhfgefgejfhefhekfekfhekfekf
+jhdfhejfgejfgejgfehgfehgfehgfhegf
diff --git a/azhdankin/upload_files.py b/azhdankin/upload_files.py
index e570ce22..f5626e58 100644
--- a/azhdankin/upload_files.py
+++ b/azhdankin/upload_files.py
@@ -1,5 +1,7 @@
 import os
 import glob
+import time
+
 from concurrent.futures import ThreadPoolExecutor
 
 from cloud_storage import CloudStorageGCP
@@ -19,7 +21,7 @@
       if os.path.isfile(element):
         file_list.append(element)
 
-MAX_UPLOAD_WORKERS = 2 
+MAX_UPLOAD_WORKERS = 10
 
 file_list_len = len(file_list)
 
@@ -32,15 +34,24 @@
 
 i=0
 
+time_start = time.time()
+
 while i < (file_list_len - remainder):
+  #storage = CloudStorageGCP("azhdanki-test-bucket1", project='rewotes')
   uploader = FileUploader (storage, file_list, i, step)
   pool.submit (uploader.run())
   i += step
 
 if remainder > 0:
+  #storage = CloudStorageGCP("azhdanki-test-bucket1", project='rewotes')
   uploader = FileUploader (storage, file_list, i, remainder)
   pool.submit (uploader.run())
 
 pool.shutdown (wait=True)
 
+time_end = time.time()
+time_delta = time_end - time_start
+print (time_delta)
+
+
 
diff --git a/azhdankin/uploader.py b/azhdankin/uploader.py
index 17771a85..d536782a 100644
--- a/azhdankin/uploader.py
+++ b/azhdankin/uploader.py
@@ -1,5 +1,5 @@
 import os
-from cloud_storage import CloudStorage
+from cloud_storage import CloudStorageGCP
 
 class FileUploader:
     def __init__(self, storage, files, start_idx, count):

From 5b8fc3a00ca0d7d6c762788427e19a66ad5e4c36 Mon Sep 17 00:00:00 2001
From: Aleksandr Zhdankin <azhdanki@Aleksandrs-MacBook-Pro.local>
Date: Tue, 21 Nov 2023 10:59:28 -0800
Subject: [PATCH 3/7] Duh...

---
 azhdankin/upload_files.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/azhdankin/upload_files.py b/azhdankin/upload_files.py
index f5626e58..43219c09 100644
--- a/azhdankin/upload_files.py
+++ b/azhdankin/upload_files.py
@@ -21,7 +21,7 @@
       if os.path.isfile(element):
         file_list.append(element)
 
-MAX_UPLOAD_WORKERS = 10
+MAX_UPLOAD_WORKERS = 50 
 
 file_list_len = len(file_list)
 
@@ -37,15 +37,16 @@
 time_start = time.time()
 
 while i < (file_list_len - remainder):
+  print(i)
   #storage = CloudStorageGCP("azhdanki-test-bucket1", project='rewotes')
   uploader = FileUploader (storage, file_list, i, step)
-  pool.submit (uploader.run())
+  pool.submit (uploader.run)
   i += step
 
 if remainder > 0:
   #storage = CloudStorageGCP("azhdanki-test-bucket1", project='rewotes')
   uploader = FileUploader (storage, file_list, i, remainder)
-  pool.submit (uploader.run())
+  pool.submit (uploader.run)
 
 pool.shutdown (wait=True)
 

From 85e6a7bf7111e013c37dbe1b4f2292e6f82bf89d Mon Sep 17 00:00:00 2001
From: Aleksandr Zhdankin <azhdanki@Aleksandrs-MacBook-Pro.local>
Date: Tue, 21 Nov 2023 18:02:14 -0800
Subject: [PATCH 4/7] Added documentation.

---
 azhdankin/cloud_storage.py | 12 ++++++++++++
 azhdankin/create_files.py  | 28 ++++++++++++++++++++++++++++
 azhdankin/upload_files.py  | 28 +++++++++++++++++++++-------
 azhdankin/uploader.py      | 20 +++++++++++++++++++-
 4 files changed, 80 insertions(+), 8 deletions(-)
 mode change 100644 => 100755 azhdankin/create_files.py
 mode change 100644 => 100755 azhdankin/upload_files.py

diff --git a/azhdankin/cloud_storage.py b/azhdankin/cloud_storage.py
index c71142b8..807a301b 100644
--- a/azhdankin/cloud_storage.py
+++ b/azhdankin/cloud_storage.py
@@ -1,5 +1,12 @@
+"""
+This module contains the definitions for the CloudStorage base class which acta like an "abstract class" or
+the equivalent of the "interface" for the Cloud storage.
+It also has an implementation of the Cloud storage implememntation for GCP, which allows to upload the files.
+"""
+
 from google.cloud import storage
 
+#Base CloudStorage class
 class CloudStorage:
     def __init__(self, name):
         self.name = name 
@@ -10,13 +17,18 @@ def upload_object(self, object_name, source_file_name):
     def __str__(self):
         return self.name
 
+#Cloud storage implementation for GCP
 class CloudStorageGCP(CloudStorage):
     def __init__(self, bucket_name, project=None):
         super().__init__("CloudStorageGCP")
         self.project = project
         self.bucket_name = bucket_name
         self.client = storage.Client(project=self.project)
+
+        #Resolve the reference to the destination bucket
         self.bucket = self.client.bucket(self.bucket_name)
+
+        #If target bucket does not exist it will be created
         if not self.bucket.exists():
           self.bucket = self.client.create_bucket(self.bucket_name)
 
diff --git a/azhdankin/create_files.py b/azhdankin/create_files.py
old mode 100644
new mode 100755
index dafed028..674b6582
--- a/azhdankin/create_files.py
+++ b/azhdankin/create_files.py
@@ -1,22 +1,50 @@
+#!python
+""" File creation utility.
+
+This is a utility to crate the directory and sample files which
+will be used for transfer to a cloud storage.
+
+The utility takes one command line parameter: number of files to create.
+If the parameter is not given by default it will create 10 files.
+
+The file creation is performed by copying the content of the ./seed-file.txt
+content n-times (where n is a randomly generated number in a range from 1 to a 100)
+into a destination file and naming the destination file by appending sequentially
+incremented number to the base file name.
+
+"""
+
+
 import sys
 import os
 import random
 
+#Set the root of the files location and the name prefix for the files to be generated
 path = "./files/"
 name_prefix = "file-2-upload"
 
+#Set the default number of files to be generated
 num_files = 10
 
+#Read the number of files to be generated from the cmd line if provided
 if len(sys.argv) > 1:
  num_files = int(sys.argv[1])
 
+#Specify the "seed" for the generated files' content.
 seed_file = "./seed-file.txt"
 
+#Create the destination directory if it does not exist
+if not os.path.exists(path):
+  os.makedirs(path)
+
+#Populate the seed string for the files to be created and initialize the content
 file=open(seed_file,"r")
 seed_content = file.read()
 target_file_content = ""
 
+#Create the files for upload
 for target_file_idx  in range (0, num_files):
+  #Replicate the seed content a random number of times
   repeat = random.randint(1,100)
   for chunk_num in range (0, repeat):
     target_file_content = target_file_content + seed_content 
diff --git a/azhdankin/upload_files.py b/azhdankin/upload_files.py
old mode 100644
new mode 100755
index 43219c09..74492586
--- a/azhdankin/upload_files.py
+++ b/azhdankin/upload_files.py
@@ -1,3 +1,11 @@
+#!python
+""" Main module to run to upload the files to the Cloud storage.
+
+This program establishes the connection to the Cloud Storage (GCP in this case),
+reads the names of the files available for upload
+and performs parallel upload of the files to the specified Cloud Storage bucket.
+
+"""
 import os
 import glob
 import time
@@ -7,11 +15,14 @@
 from cloud_storage import CloudStorageGCP
 from uploader import FileUploader
 
-
+#Path to the root directory where the files to be uploaded are located
 path = "./files/*"
 
+#Initialize the file names list
 file_list = []
 
+#Populate the list of the file names set for upload. Currently we support two levels of the directories
+#where files are located
 for entry in glob.iglob(path, recursive=True):
   if os.path.isfile(entry):
     file_list.append(entry)
@@ -21,30 +32,33 @@
       if os.path.isfile(element):
         file_list.append(element)
 
-MAX_UPLOAD_WORKERS = 50 
+#Specify the maximum number of the workers that perform files upload simultaneously
+MAX_UPLOAD_WORKERS = 100 
 
-file_list_len = len(file_list)
+#Calculate the partitioning of the file names list - each partition or chunk will be assigned
+#to a single upload worker
 
+file_list_len = len(file_list)
 step = int(file_list_len/MAX_UPLOAD_WORKERS)
 remainder = file_list_len%MAX_UPLOAD_WORKERS
 
+#Initialize a Cloud Storage Provider
 storage = CloudStorageGCP("azhdanki-test-bucket1", project='rewotes')
 
+#Create the Thread Pool which will be used to run the uploader tasks
 pool = ThreadPoolExecutor (max_workers=MAX_UPLOAD_WORKERS)
 
+#Schedule the upload tasks
 i=0
-
 time_start = time.time()
 
 while i < (file_list_len - remainder):
   print(i)
-  #storage = CloudStorageGCP("azhdanki-test-bucket1", project='rewotes')
   uploader = FileUploader (storage, file_list, i, step)
   pool.submit (uploader.run)
   i += step
 
 if remainder > 0:
-  #storage = CloudStorageGCP("azhdanki-test-bucket1", project='rewotes')
   uploader = FileUploader (storage, file_list, i, remainder)
   pool.submit (uploader.run)
 
@@ -52,7 +66,7 @@
 
 time_end = time.time()
 time_delta = time_end - time_start
-print (time_delta)
+print ("It took " + str(time_delta) + " seconds to upload " + str(file_list_len) + " files.")
 
 
 
diff --git a/azhdankin/uploader.py b/azhdankin/uploader.py
index d536782a..7347fbb0 100644
--- a/azhdankin/uploader.py
+++ b/azhdankin/uploader.py
@@ -1,13 +1,31 @@
+"""
+Class performing upload of the files to the cloud storage.
+"""
+
 import os
-from cloud_storage import CloudStorageGCP
 
 class FileUploader:
+    """
+    Constructor is taking the following parameters:
+      storage - a reference to the instance of CloudStorage object.
+                a CloudStorage class is a parent class for the cloud storage
+                provider specific implementations, i.e. CloudStorageGCP, CloudStrageAWS
+
+      files - a reference to the entire list of the file names that need to be uploaded
+
+      start_idx - an index, a "pointer" to the files list specifying where this file uploader 
+                  will start
+ 
+      count - a number which specifies how many files this instance of uploader has to process (upload)
+
+     """
     def __init__(self, storage, files, start_idx, count):
         self.storage = storage 
         self.files = files
         self.start = start_idx
         self.count = count
 
+    #The method performing the upload of the group of the files to the Cloud storage
     def run (self):
         for i in range(self.start, self.start + self.count):
           object_name = os.path.split(self.files[i])[1]

From eefd6025de02d474cbec1f863ae4c71a1d55fbb1 Mon Sep 17 00:00:00 2001
From: Aleksandr Zhdankin <azhdanki@Aleksandrs-MacBook-Pro.local>
Date: Tue, 21 Nov 2023 18:18:54 -0800
Subject: [PATCH 5/7] Updated the README file.

---
 azhdankin/README.md | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/azhdankin/README.md b/azhdankin/README.md
index 53dae762..07d5c320 100644
--- a/azhdankin/README.md
+++ b/azhdankin/README.md
@@ -1,29 +1,25 @@
-# Multi-threaded file uploader (Backend)
-
-> Ideal candidate: skilled python developer with solid knowledge of cloud and distributed systems.
+# Multi-threaded file uploader
 
 # Overview
 
-Create a python application that uploads a set of given files to a cloud object storage in parallel through the cloud provider's or third party API.
+This is a python application that uploads a set of given files to a cloud object storage in parallel through the cloud provider's or third party API.
 
-# Requirements
+# Features 
 
 1. Support up to 100,000nds of files, all inside one directory with arbitrary sizes. The root directory may contain subdirectories.
-1. The object storage container which holds the objects is private and only credential-based access is allowed.
-1. Each object inside object storage should have an associated metadata which contains file size, last modification time and file permissions.
-
-# Expectations
+2. The object storage container holds the objects is private and only credential-based access is allowed.
+3. Each object inside object storage has an associated metadata which contains file size, last modification time and file permissions.
 
-- Fast (utilize full network bandwidth), low CPU (do not block all other processes) and low Memory (<25% tentatively) file uploader
-- Support for AWS S3
-- Modular and Object oriented implementation (to add other cloud providers)
-- Clean and documented code
-- Tests
+ The utility is fast (utilizes full network bandwidth), consumes low CPU (low enough not block all other processes) and low Memory (<25%)
+ It supports GCP Cloud Storage, however it has a modular and Object oriented implementation so the other cloud providers can be added. 
 
-# Timeline
+# Prerequisites
+  You must have Python 3.8 and the Google Cloud Storage Python client installed.
 
-We leave exact timing to the candidate. Must fit Within 5 days total.
+# To run
+  1.  Clone the git repository. Make sure the create_files.py and upload_files.py file permissions are set to "executable".
+  2.  Run ./create_files.py utility. This utility will create the files that need to be uploaded to the cloud storage. You can set the 
+      number of file sto be created as a cmd line parameter (i.e. ./create_files.py 10000 to create 10000 files).
+  3. Run ./upload_files.py to upload the files to the Cloud Storage.
 
-# Notes
 
-- we can provide temporary credentials to access AWS/Azure.

From 45569b37f21a189c526838bea5e0b5efd457e37c Mon Sep 17 00:00:00 2001
From: Aleksandr Zhdankin <azhdanki@Aleksandrs-MacBook-Pro.local>
Date: Tue, 21 Nov 2023 18:22:02 -0800
Subject: [PATCH 6/7] Updated the README file.

---
 azhdankin/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/azhdankin/README.md b/azhdankin/README.md
index 07d5c320..834c3368 100644
--- a/azhdankin/README.md
+++ b/azhdankin/README.md
@@ -22,4 +22,4 @@ This is a python application that uploads a set of given files to a cloud object
       number of file sto be created as a cmd line parameter (i.e. ./create_files.py 10000 to create 10000 files).
   3. Run ./upload_files.py to upload the files to the Cloud Storage.
 
-
+# Notes

From 7b1e52b6c2bf193478bfbf3291d1ea1aaa4b1d4d Mon Sep 17 00:00:00 2001
From: Aleksandr Zhdankin <azhdanki@Aleksandrs-MacBook-Pro.local>
Date: Tue, 21 Nov 2023 20:16:34 -0800
Subject: [PATCH 7/7] Added samples of the test code.

---
 azhdankin/create_files.py      | 52 ++++++++++++++++++----------------
 azhdankin/test_create_files.py | 40 ++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 25 deletions(-)
 create mode 100755 azhdankin/test_create_files.py

diff --git a/azhdankin/create_files.py b/azhdankin/create_files.py
index 674b6582..64d23e9a 100755
--- a/azhdankin/create_files.py
+++ b/azhdankin/create_files.py
@@ -13,15 +13,38 @@
 incremented number to the base file name.
 
 """
-
-
 import sys
 import os
 import random
 
+#Performs the file creation
+def create_files (path, num_files):
+   #Specify the "seed" for the generated files' content.
+   seed_file = "./seed-file.txt"
+   name_prefix = "file-2-upload"
+
+   #Create the destination directory if it does not exist
+   if not os.path.exists(path):
+     os.makedirs(path)
+
+   #Populate the seed string for the files to be created and initialize the content
+   file=open(seed_file,"r")
+   seed_content = file.read()
+   target_file_content = ""
+
+   #Create the files for upload
+   for target_file_idx  in range (0, num_files):
+     #Replicate the seed content a random number of times
+     repeat = random.randint(1,100)
+     for chunk_num in range (0, repeat):
+       target_file_content = target_file_content + seed_content
+     target_file = open (path + name_prefix + str(target_file_idx) + ".txt", 'w')
+     target_file.write (target_file_content)
+     target_file_content = ""
+
+
 #Set the root of the files location and the name prefix for the files to be generated
 path = "./files/"
-name_prefix = "file-2-upload"
 
 #Set the default number of files to be generated
 num_files = 10
@@ -30,26 +53,5 @@
 if len(sys.argv) > 1:
  num_files = int(sys.argv[1])
 
-#Specify the "seed" for the generated files' content.
-seed_file = "./seed-file.txt"
-
-#Create the destination directory if it does not exist
-if not os.path.exists(path):
-  os.makedirs(path)
-
-#Populate the seed string for the files to be created and initialize the content
-file=open(seed_file,"r")
-seed_content = file.read()
-target_file_content = ""
-
-#Create the files for upload
-for target_file_idx  in range (0, num_files):
-  #Replicate the seed content a random number of times
-  repeat = random.randint(1,100)
-  for chunk_num in range (0, repeat):
-    target_file_content = target_file_content + seed_content 
-  target_file = open (path + name_prefix + str(target_file_idx) + ".txt", 'w')
-  target_file.write (target_file_content)  
-  target_file_content = ""
-
+create_files(path, num_files)
 
diff --git a/azhdankin/test_create_files.py b/azhdankin/test_create_files.py
new file mode 100755
index 00000000..3b8da6a5
--- /dev/null
+++ b/azhdankin/test_create_files.py
@@ -0,0 +1,40 @@
+#Test for file creation utility.
+import glob
+import os
+from create_files import create_files
+
+#Perform test of file creation
+def test_create_files():
+
+  #Set the root of the files location and the name prefix for the files to be generated
+  path = "./test_files/"
+
+  #Set the default number of files to be generated
+  num_files = 10
+
+  create_files(path, num_files)
+
+  #Path to the root directory where the created files  are located
+  path = "./test_files/*"
+
+  #Initialize the file names list
+  file_list = []
+
+  #Populate the list of the file names that were generated. Currently we support two levels of the directories
+  #where files are located
+  for entry in glob.iglob(path, recursive=True):
+    if os.path.isfile(entry):
+      file_list.append(entry)
+    else:
+      entry = entry + "/*"
+      for element in glob.iglob(entry, recursive=True):
+        if os.path.isfile(element):
+          file_list.append(element)
+
+  file_list_len = len(file_list)
+  assert file_list_len == 10
+ 
+
+
+
+