From 961706b27ef792a4a69aa5d79dec60d46d22a27b Mon Sep 17 00:00:00 2001
From: YiwenShaoStephen <sywcs007wow@gmail.com>
Date: Wed, 1 Nov 2017 19:38:29 -0400
Subject: [PATCH 1/3] enable parallel jobs in feature extraction; minor changes
 on formatting

---
 .../s5/local/augment_and_make_feature_vect.py | 288 ------------------
 egs/iam/s5/local/extract_feature.sh           |  70 +++++
 egs/iam/s5/local/make_feature_vect.py         | 246 +++++++++++++--
 egs/iam/s5/local/prepare_lexicon.py           |  51 ++--
 egs/iam/s5/local/process_augment_data.py      |  75 +++++
 egs/iam/s5/local/process_data.py              | 112 +++----
 .../s5/local/unk_arc_post_to_transcription.py | 105 +++----
 egs/iam/s5/run.sh                             |  38 +--
 8 files changed, 516 insertions(+), 469 deletions(-)
 delete mode 100755 egs/iam/s5/local/augment_and_make_feature_vect.py
 create mode 100755 egs/iam/s5/local/extract_feature.sh
 create mode 100755 egs/iam/s5/local/process_augment_data.py

diff --git a/egs/iam/s5/local/augment_and_make_feature_vect.py b/egs/iam/s5/local/augment_and_make_feature_vect.py
deleted file mode 100755
index b1c179d71ed..00000000000
--- a/egs/iam/s5/local/augment_and_make_feature_vect.py
+++ /dev/null
@@ -1,288 +0,0 @@
-#!/usr/bin/env python
-import random
-import argparse
-import os
-import sys
-import scipy.io as sio
-import numpy as np
-from scipy import misc
-from scipy.ndimage.interpolation import affine_transform
-import math
-from signal import signal, SIGPIPE, SIG_DFL
-signal(SIGPIPE, SIG_DFL)
-
-parser = argparse.ArgumentParser(
-    description="""Generates and saves the feature vectors""")
-parser.add_argument(
-    'dir', type=str, help='directory of images.scp and is also output directory')
-parser.add_argument('--seg', type=str, default='1',
-                    help='JOB number of images.JOB.scp if run in parallel mode')
-parser.add_argument('--out-ark', type=str, default='-',
-                    help='where to write the output feature file')
-parser.add_argument('--scale-size', type=int, default=40,
-                    help='size to scale the height of all images')
-parser.add_argument('--padding', type=int, default=5,
-                    help='size to scale the height of all images')
-parser.add_argument('--vertical-shift', type=int, default=10,
-                    help='total number of padding pixel per column')
-args = parser.parse_args()
-
-
-def write_kaldi_matrix(file_handle, matrix, key):
-    file_handle.write(key + " [ ")
-    num_rows = len(matrix)
-    if num_rows == 0:
-        raise Exception("Matrix is empty")
-    num_cols = len(matrix[0])
-
-    for row_index in range(len(matrix)):
-        if num_cols != len(matrix[row_index]):
-            raise Exception("All the rows of a matrix are expected to "
-                            "have the same length")
-        file_handle.write(" ".join(map(lambda x: str(x), matrix[row_index])))
-        if row_index != num_rows - 1:
-            file_handle.write("\n")
-    file_handle.write(" ]\n")
-
-
-def get_scaled_image(im):
-    scale_size = args.scale_size
-    sx = im.shape[1]  # width
-    sy = im.shape[0]  # height
-    scale = (1.0 * scale_size) / sy
-    nx = int(scale_size)
-    ny = int(scale * sx)
-    im = misc.imresize(im, (nx, ny))
-    padding_x = max(5, int((args.padding / 100) * im.shape[1]))
-    padding_y = im.shape[0]
-    im_pad = np.concatenate(
-        (255 * np.ones((padding_y, padding_x), dtype=int), im), axis=1)
-    im_pad1 = np.concatenate(
-        (im_pad, 255 * np.ones((padding_y, padding_x), dtype=int)), axis=1)
-    return im_pad1
-
-
-def contrast_normalization(im, low_pct, high_pct):
-    element_number = im.size
-    rows = im.shape[0]
-    cols = im.shape[1]
-    im_contrast = np.zeros(shape=im.shape)
-    low_index = int(low_pct * element_number)
-    high_index = int(high_pct * element_number)
-    sorted_im = np.sort(im, axis=None)
-    low_thred = sorted_im[low_index]
-    high_thred = sorted_im[high_index]
-    for i in range(rows):
-        for j in range(cols):
-            if im[i, j] > high_thred:
-                im_contrast[i, j] = 255  # lightest to white
-            elif im[i, j] < low_thred:
-                im_contrast[i, j] = 0  # darkest to black
-            else:
-                # linear normalization
-                im_contrast[i, j] = (im[i, j] - low_thred) * \
-                    255 / (high_thred - low_thred)
-    return im_contrast
-
-
-def geometric_moment(frame, p, q):
-    m = 0
-    for i in range(frame.shape[1]):
-        for j in range(frame.shape[0]):
-            m += (i ** p) * (j ** q) * frame[i][i]
-    return m
-
-
-def central_moment(frame, p, q):
-    u = 0
-    x_bar = geometric_moment(frame, 1, 0) / \
-        geometric_moment(frame, 0, 0)  # m10/m00
-    y_bar = geometric_moment(frame, 0, 1) / \
-        geometric_moment(frame, 0, 0)  # m01/m00
-    for i in range(frame.shape[1]):
-        for j in range(frame.shape[0]):
-            u += ((i - x_bar)**p) * ((j - y_bar)**q) * frame[i][j]
-    return u
-
-
-def height_normalization(frame, w, h):
-    frame_normalized = np.zeros(shape=(h, w))
-    alpha = 4
-    x_bar = geometric_moment(frame, 1, 0) / \
-        geometric_moment(frame, 0, 0)  # m10/m00
-    y_bar = geometric_moment(frame, 0, 1) / \
-        geometric_moment(frame, 0, 0)  # m01/m00
-    sigma_x = (alpha * ((central_moment(frame, 2, 0) /
-                         geometric_moment(frame, 0, 0)) ** .5))  # alpha * sqrt(u20/m00)
-    sigma_y = (alpha * ((central_moment(frame, 0, 2) /
-                         geometric_moment(frame, 0, 0)) ** .5))  # alpha * sqrt(u02/m00)
-    for x in range(w):
-        for y in range(h):
-            i = int((x / w - 0.5) * sigma_x + x_bar)
-            j = int((y / h - 0.5) * sigma_y + y_bar)
-            frame_normalized[x][y] = frame[i][j]
-    return frame_normalized
-
-
-def find_slant(im):
-    rows = im.shape[0]
-    cols = im.shape[1]
-    sum_max = 0
-    slant_degree = 0
-    for shear_degree in range(-45, 45, 5):
-        sum = 0
-        shear_rad = shear_degree / 360.0 * 2 * math.pi
-        shear_matrix = np.array([[1, 0],
-                                 [np.tan(shear_rad), 1]])
-        sheared_im = affine_transform(im, shear_matrix, cval=255.0)
-        for j in range(cols):
-            foreground = (sheared_im[:, j] < 100)
-            number = np.sum(foreground)
-            # print(number)
-            if number != 0:
-                start_point = -1
-                end_point = -1
-                start_point = 0
-                for i in range(rows):
-                    if foreground[i] == 1:
-                        start_point = i
-                        break
-                for i in range(rows - 1, -1, -1):
-                    if foreground[i] == 1:
-                        end_point = i
-                        break
-                length = end_point - start_point + 1
-                #print(number, length)
-                if length == number:
-                    sum = sum + number * number
-        #print(shear_degree, sum)
-        if sum > sum_max:
-            sum_max = sum
-            slant_degree = shear_degree
-    return slant_degree
-
-
-def deslant(im, shear):
-    padding_x = int(abs(np.tan(shear)) * im.shape[0])
-    padding_y = im.shape[0]
-    if shear > 0:
-        im_pad = np.concatenate(
-            (255 * np.ones((padding_y, padding_x), dtype=int), im), axis=1)
-    else:
-        im_pad = np.concatenate(
-            (im, 255 * np.ones((padding_y, padding_x), dtype=int)), axis=1)
-
-    shear_matrix = np.array([[1, 0],
-                             [np.tan(shear), 1]])
-    # sheared_im = affine_transform(image, shear_matrix, output_shape=(
-    # im.shape[0], im.shape[1] + abs(int(im.shape[0] * np.tan(shear)))), cval=128.0)
-    sheared_im = affine_transform(im_pad, shear_matrix, cval=255.0)
-    return sheared_im
-
-
-def vertical_shift(im, mode='mid'):
-    total = args.vertical_shift
-    if mode == 'mid':
-        top = total / 2
-        bottom = total - top
-    elif mode == 'top':  # more padding on top
-        top = random.randint(total / 2, total)
-        bottom = total - top
-    elif mode == 'bottom':  # more padding on bottom
-        top = random.randint(0, total / 2)
-        bottom = total - top
-    width = im.shape[1]
-    im_pad = np.concatenate(
-        (255 * np.ones((top, width), dtype=int), im), axis=0)
-    im_pad = np.concatenate(
-        (im_pad, 255 * np.ones((bottom, width), dtype=int)), axis=0)
-    return im_pad
-
-
-def image_augment(im, out_fh, image_id):
-    random.seed(1)
-    shift_setting = ['mid', 'top', 'bottom']
-    image_shift_id = []
-    for i in range(3):
-        image_shift_id.append(image_id + '_shift' + str(i + 1))
-        im_shift = vertical_shift(im, shift_setting[i])
-        im_scaled = get_scaled_image(im_shift)
-        data = np.transpose(im_scaled, (1, 0))
-        data = np.divide(data, 255.0)
-        new_scp_list.append(image_id + '_shift' + str(i + 1))
-        write_kaldi_matrix(out_fh, data, image_shift_id[i])
-
-
-# main #
-new_scp_list = list()
-text_file = os.path.join(args.dir, 'backup', 'text')
-text_dict = dict()  # stores imageID and text
-
-with open(text_file) as text_fh:
-    for uttID_text in text_fh:
-        uttID_text = uttID_text.strip()
-        uttID_text_vect = uttID_text.split(" ")
-        uttID = uttID_text_vect[0]
-        imageID = uttID.split("_")[1]
-        text_vect = uttID_text_vect[1:]
-        text = " ".join(text_vect)
-        text_dict[imageID] = text
-
-utt2spk_file = os.path.join(args.dir, 'backup', 'utt2spk')
-uttID_spk_dict = dict()  # stores imageID and speaker
-
-with open(utt2spk_file) as utt2spk_fh:
-    for uttID_spk in utt2spk_fh:
-        uttID_spk = uttID_spk.strip()
-        uttID_spk_vect = uttID_spk.split(" ")
-        uttID = uttID_spk_vect[0]
-        imageID = uttID.split("_")[1]
-        spk = uttID_spk_vect[1]
-        uttID_spk_dict[imageID] = spk
-
-image_file = os.path.join(args.dir, 'backup', 'images.scp')
-uttID_path_dict = dict()  # stores imageID and image path
-
-with open(image_file) as image_fh:
-    for uttID_path in image_fh:
-        uttID_path = uttID_path.strip()
-        uttID_path_vect = uttID_path.split(" ")
-        uttID = uttID_path_vect[0]
-        imageID = uttID.split("_")[1]
-        path = uttID_path_vect[1]
-        uttID_path_dict[imageID] = path
-
-scp_name = 'images.scp'
-data_list_path = os.path.join(args.dir, 'backup', scp_name)
-
-if args.out_ark == '-':
-    out_fh = sys.stdout
-else:
-    out_fh = open(args.out_ark, 'wb')
-
-text_file = os.path.join(args.dir, 'text')
-text_fh = open(text_file, 'w+')
-
-utt2spk_file = os.path.join(args.dir, 'utt2spk')
-utt2spk_fh = open(utt2spk_file, 'w+')
-
-image_file = os.path.join(args.dir, 'images.scp')
-image_fh = open(image_file, 'w+')
-
-with open(data_list_path) as f:
-    for line in f:
-        line = line.strip()
-        line_vect = line.split(' ')
-        image_id = line_vect[0]
-        image_path = line_vect[1]
-        im = misc.imread(image_path)
-        #im_contrast = contrast_normalization(im, 0.05, 0.2)
-        #shear = (find_slant(im_contrast) / 360.0) * 2 * math.pi
-        im_scaled = get_scaled_image(im)
-        image_augment(im_scaled, out_fh, image_id)
-
-for uttID in new_scp_list:
-    imageID = uttID.split("_")[1]
-    text_fh.write(uttID + ' ' + text_dict[imageID] + '\n')
-    utt2spk_fh.write(uttID + ' ' + uttID_spk_dict[imageID] + '\n')
-    image_fh.write(uttID + ' ' + uttID_path_dict[imageID] + '\n')
diff --git a/egs/iam/s5/local/extract_feature.sh b/egs/iam/s5/local/extract_feature.sh
new file mode 100755
index 00000000000..d7b4ba79a54
--- /dev/null
+++ b/egs/iam/s5/local/extract_feature.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+nj=4
+cmd=run.pl
+compress=true
+scale_size=40
+vertical_shift=10
+horizontal_shear=45
+augment=false
+echo "$0 $@"
+
+. utils/parse_options.sh || exit 1;
+
+data=$1
+featdir=$data/data
+logdir=$data/log
+
+# make $featdir an absolute pathname
+featdir=`perl -e '($dir,$pwd)= @ARGV; if($dir!~m:^/:) { $dir = "$pwd/$dir"; } print $dir; ' $featdir ${PWD}`
+
+if [ -f $data/feats.scp ]; then
+    mkdir -p $data/.backup
+    echo "$0: moving $data/feats.scp to $data/.backup"
+    mv $data/feats.scp $data/.backup
+fi
+
+if [ $augment = true ] && [[ $data = *'train'* ]]; then
+  if [ ! -d $data/backup ]; then
+    mkdir -p $data/backup
+    mv $data/text $data/utt2spk $data/images.scp $data/backup/
+  else
+    cp $data/backup/* $data
+  fi
+fi
+
+
+scp=$data/images.scp  
+for n in $(seq $nj); do
+    split_scps="$split_scps $logdir/images.$n.scp"
+done
+
+utils/split_scp.pl $scp $split_scps || exit 1;
+
+
+# add ,p to the input rspecifier so that we can just skip over
+# utterances that have bad wave data.
+$cmd JOB=1:$nj $logdir/extract_feature.JOB.log \
+  local/make_feature_vect.py $logdir --job JOB --scale-size $scale_size --augment $augment --horizontal-shear $horizontal_shear \| \
+    copy-feats --compress=$compress --compression-method=7 ark:- \
+    ark,scp:$featdir/images.JOB.ark,$featdir/images.JOB.scp \
+    || exit 1;  
+
+# concatenate the .scp files together.
+for n in $(seq $nj); do
+  cat $featdir/images.$n.scp || exit 1;
+done > $data/feats.scp || exit 1
+
+# re-map utt2spk, images.scp and text if doing image augmentation
+# on training set
+if [ $augment = true ] && [[ $data = *'train'* ]]; then
+  local/process_augment_data.py $data
+  utils/utt2spk_to_spk2utt.pl $data/utt2spk > $data/spk2utt 
+fi    
+
+nf=`cat $data/feats.scp | wc -l`
+nu=`cat $data/utt2spk | wc -l`
+if [ $nf -ne $nu ]; then
+    echo "It seems not all of the feature files were successfully processed ($nf != $nu);"
+    echo "consider using utils/fix_data_dir.sh $data"
+fi
diff --git a/egs/iam/s5/local/make_feature_vect.py b/egs/iam/s5/local/make_feature_vect.py
index dd35f1b14c7..697f9d92b86 100755
--- a/egs/iam/s5/local/make_feature_vect.py
+++ b/egs/iam/s5/local/make_feature_vect.py
@@ -1,20 +1,34 @@
 #!/usr/bin/env python
-
+import random
 import argparse
 import os
 import sys
 import scipy.io as sio
 import numpy as np
 from scipy import misc
-
+from scipy.ndimage.interpolation import affine_transform
+import math
 from signal import signal, SIGPIPE, SIG_DFL
-signal(SIGPIPE,SIG_DFL)
+signal(SIGPIPE, SIG_DFL)
 
-parser = argparse.ArgumentParser(description="""Generates and saves the feature vectors""")
-parser.add_argument('dir', type=str, help='directory of images.scp and is also output directory')
-parser.add_argument('--out-ark', type=str, default='-', help='where to write the output feature file')
-parser.add_argument('--scale-size', type=int, default=40, help='size to scale the height of all images')
-parser.add_argument('--padding', type=int, default=5, help='size to scale the height of all images')
+parser = argparse.ArgumentParser(
+    description="""Generates and saves the feature vectors""")
+parser.add_argument(
+    'dir', type=str, help='directory of images.scp and is also output directory')
+parser.add_argument('--job', type=str, default='',
+                    help='JOB number of images.JOB.scp')
+parser.add_argument('--out-ark', type=str, default='-',
+                    help='where to write the output feature file')
+parser.add_argument('--scale-size', type=int, default=40,
+                    help='size to scale the height of all images')
+parser.add_argument('--padding', type=int, default=5,
+                    help='size to scale the height of all images')
+parser.add_argument('--augment', type=str, default='false',
+                    help='whether or not to do image augmentation on training set')
+parser.add_argument('--vertical-shift', type=int, default=10,
+                    help='total number of padding pixel per column')
+parser.add_argument('--horizontal-shear', type=int, default=45,
+                    help='maximum horizontal shearing degree')
 args = parser.parse_args()
 
 
@@ -34,38 +48,212 @@ def write_kaldi_matrix(file_handle, matrix, key):
             file_handle.write("\n")
     file_handle.write(" ]\n")
 
+
 def get_scaled_image(im):
     scale_size = args.scale_size
-    sx = im.shape[1]
-    sy = im.shape[0]
+    sx = im.shape[1]  # width
+    sy = im.shape[0]  # height
     scale = (1.0 * scale_size) / sy
     nx = int(scale_size)
     ny = int(scale * sx)
     im = misc.imresize(im, (nx, ny))
-    padding_x = max(5,int((args.padding/100)*im.shape[1]))
+    padding_x = max(5, int((args.padding / 100) * im.shape[1]))
     padding_y = im.shape[0]
-    im_pad = np.concatenate((255 * np.ones((padding_y,padding_x), dtype=int), im), axis=1)
-    im_pad1 = np.concatenate((im_pad,255 * np.ones((padding_y, padding_x), dtype=int)), axis=1)
+    im_pad = np.concatenate(
+        (255 * np.ones((padding_y, padding_x), dtype=int), im), axis=1)
+    im_pad1 = np.concatenate(
+        (im_pad, 255 * np.ones((padding_y, padding_x), dtype=int)), axis=1)
     return im_pad1
 
-### main ###
-data_list_path = os.path.join(args.dir,'images.scp')
 
+def contrast_normalization(im, low_pct, high_pct):
+    element_number = im.size
+    rows = im.shape[0]
+    cols = im.shape[1]
+    im_contrast = np.zeros(shape=im.shape)
+    low_index = int(low_pct * element_number)
+    high_index = int(high_pct * element_number)
+    sorted_im = np.sort(im, axis=None)
+    low_thred = sorted_im[low_index]
+    high_thred = sorted_im[high_index]
+    for i in range(rows):
+        for j in range(cols):
+            if im[i, j] > high_thred:
+                im_contrast[i, j] = 255  # lightest to white
+            elif im[i, j] < low_thred:
+                im_contrast[i, j] = 0  # darkest to black
+            else:
+                # linear normalization
+                im_contrast[i, j] = (im[i, j] - low_thred) * \
+                    255 / (high_thred - low_thred)
+    return im_contrast
+
+
+def geometric_moment(frame, p, q):
+    m = 0
+    for i in range(frame.shape[1]):
+        for j in range(frame.shape[0]):
+            m += (i ** p) * (j ** q) * frame[i][i]
+    return m
+
+
+def central_moment(frame, p, q):
+    u = 0
+    x_bar = geometric_moment(frame, 1, 0) / \
+        geometric_moment(frame, 0, 0)  # m10/m00
+    y_bar = geometric_moment(frame, 0, 1) / \
+        geometric_moment(frame, 0, 0)  # m01/m00
+    for i in range(frame.shape[1]):
+        for j in range(frame.shape[0]):
+            u += ((i - x_bar)**p) * ((j - y_bar)**q) * frame[i][j]
+    return u
+
+
+def height_normalization(frame, w, h):
+    frame_normalized = np.zeros(shape=(h, w))
+    alpha = 4
+    x_bar = geometric_moment(frame, 1, 0) / \
+        geometric_moment(frame, 0, 0)  # m10/m00
+    y_bar = geometric_moment(frame, 0, 1) / \
+        geometric_moment(frame, 0, 0)  # m01/m00
+    sigma_x = (alpha * ((central_moment(frame, 2, 0) /
+                         geometric_moment(frame, 0, 0)) ** .5))  # alpha * sqrt(u20/m00)
+    sigma_y = (alpha * ((central_moment(frame, 0, 2) /
+                         geometric_moment(frame, 0, 0)) ** .5))  # alpha * sqrt(u02/m00)
+    for x in range(w):
+        for y in range(h):
+            i = int((x / w - 0.5) * sigma_x + x_bar)
+            j = int((y / h - 0.5) * sigma_y + y_bar)
+            frame_normalized[x][y] = frame[i][j]
+    return frame_normalized
+
+
+def find_slant_project(im):
+    rows = im.shape[0]
+    cols = im.shape[1]
+    std_max = 0
+    alpha_max = 0
+    col_disp = np.zeros(90, int)
+    proj = np.zeros(shape=(90, cols + 2 * rows), dtype=int)
+    for r in range(rows):
+        for alpha in range(-45, 45, 1):
+            col_disp[alpha] = int(r * math.tan(alpha / 180.0 * math.pi))
+        for c in range(cols):
+            if im[r, c] < 100:
+                for alpha in range(-45, 45, 1):
+                    proj[alpha + 45, c + col_disp[alpha] + rows] += 1
+    for alpha in range(-45, 45, 1):
+        proj_histogram, bin_array = np.histogram(proj[alpha + 45, :], bins=10)
+        proj_std = np.std(proj_histogram)
+        if proj_std > std_max:
+            std_max = proj_std
+            alpha_max = alpha
+    proj_std = np.std(proj, axis=1)
+    return -alpha_max
+
+
+def horizontal_shear(im, degree):
+    rad = degree / 180.0 * math.pi
+    padding_x = int(abs(np.tan(rad)) * im.shape[0])
+    padding_y = im.shape[0]
+    if rad > 0:
+        im_pad = np.concatenate(
+            (255 * np.ones((padding_y, padding_x), dtype=int), im), axis=1)
+    elif rad < 0:
+        im_pad = np.concatenate(
+            (im, 255 * np.ones((padding_y, padding_x), dtype=int)), axis=1)
+    else:
+        im_pad = im
+    shear_matrix = np.array([[1, 0],
+                             [np.tan(rad), 1]])
+    # sheared_im = affine_transform(image, shear_matrix, output_shape=(
+    # im.shape[0], im.shape[1] + abs(int(im.shape[0] * np.tan(shear)))), cval=128.0)
+    sheared_im = affine_transform(im_pad, shear_matrix, cval=255.0)
+    return sheared_im
+
+
+def vertical_shift(im, mode='mid'):
+    total = args.vertical_shift
+    if mode == 'mid':
+        top = total / 2
+        bottom = total - top
+    elif mode == 'top':  # more padding on top
+        top = random.randint(total / 2, total)
+        bottom = total - top
+    elif mode == 'bottom':  # more padding on bottom
+        top = random.randint(0, total / 2)
+        bottom = total - top
+    width = im.shape[1]
+    im_pad = np.concatenate(
+        (255 * np.ones((top, width), dtype=int) -
+         np.random.normal(2, 1, (top, width)).astype(int), im), axis=0)
+    im_pad = np.concatenate(
+        (im_pad, 255 * np.ones((bottom, width), dtype=int) -
+         np.random.normal(2, 1, (bottom, width)).astype(int)), axis=0)
+    return im_pad
+
+
+def image_augment(im, out_fh, image_id):
+    # shift_setting = ['mid', 'top', 'bottom']
+    slant_degree = find_slant_project(im)
+    shear_degrees = [0, random.randint(0, args.horizontal_shear),
+                     random.randint(-args.horizontal_shear, 0)]
+    im_deslanted = horizontal_shear(im, slant_degree)
+    image_shear_id = []
+    for i in range(3):
+        image_shear_id.append(image_id + '_shear' + str(i + 1))
+        im_shear = horizontal_shear(im_deslanted, shear_degrees[i])
+        data = np.transpose(im_shear, (1, 0))
+        data = np.divide(data, 255.0)
+        write_kaldi_matrix(out_fh, data, image_shear_id[i])
+
+        # image_shift_id.append(image_id + '_shift' + str(i + 1))
+        # im_shift = vertical_shift(im, shift_setting[i])
+        # data = np.transpose(im_shift, (1, 0))
+        # data = np.divide(data, 255.0)
+        # new_scp_list.append(image_id + '_shift' + str(i + 1))
+        # write_kaldi_matrix(out_fh, data, image_shift_id[i])
+
+
+# main #
+
+random.seed(1)
+
+scp_name = 'images.' + args.job + '.scp'  # parallel
+data_list_path = os.path.join(args.dir, scp_name)
+# output dir of feature matrix
 if args.out_ark == '-':
     out_fh = sys.stdout
 else:
-    out_fh = open(args.out_ark,'wb')
-
-with open(data_list_path) as f:
-    for line in f:
-        line = line.strip()
-        line_vect = line.split(' ')
-        image_id = line_vect[0]
-        image_path = line_vect[1]
-        im = misc.imread(image_path)
-        im_scale = get_scaled_image(im)
-        
-        data = np.transpose(im_scale, (1, 0))
-        data = np.divide(data, 255.0)
-        write_kaldi_matrix(out_fh, data, image_id)
+    out_fh = open(args.out_ark, 'wb')
+
+
+if (args.augment == 'true') and ('train' in args.dir):
+    # only do image augmentation for training data
+    with open(data_list_path) as f:
+        for line in f:
+            line = line.strip()
+            line_vect = line.split(' ')
+            image_id = line_vect[0]
+            image_path = line_vect[1]
+            im = misc.imread(image_path)
+            im_contrast = contrast_normalization(im, 0.05, 0.2)
+            im_scaled = get_scaled_image(im)
+            image_augment(im_scaled, out_fh, image_id)
 
+else:  # settings for without augmentation or test data
+    with open(data_list_path) as f:
+        for line in f:
+            line = line.strip()
+            line_vect = line.split(' ')
+            image_id = line_vect[0]
+            image_path = line_vect[1]
+            im = misc.imread(image_path)
+            im_scaled = get_scaled_image(im)
+            im_contrast = contrast_normalization(im_scaled, 0.05, 0.2)
+            # slant_degree = find_slant_project(im_contrast)
+            # im_sheared = horizontal_shear(im_contrast, slant_degree)
+            # im_padded = vertical_shift(im_scaled, 10)
+            data = np.transpose(im_contrast, (1, 0))
+            data = np.divide(data, 255.0)
+            write_kaldi_matrix(out_fh, data, image_id)
diff --git a/egs/iam/s5/local/prepare_lexicon.py b/egs/iam/s5/local/prepare_lexicon.py
index 86298c45733..6a31e635a18 100755
--- a/egs/iam/s5/local/prepare_lexicon.py
+++ b/egs/iam/s5/local/prepare_lexicon.py
@@ -4,9 +4,11 @@
 import os
 import sys
 
-parser = argparse.ArgumentParser(description="""Creates the list of characters and words in lexicon""")
+parser = argparse.ArgumentParser(
+    description="""Creates the list of characters and wor                                 ds in lexicon""")
 parser.add_argument('database_path', type=str, help='path to train text file')
-parser.add_argument('test_text', type=str, help='path to test text file to include it in lexicon')
+parser.add_argument('test_text', type=str,
+                    help='path to test text file to include it in lexicon')
 parser.add_argument('dir', type=str, help='output path')
 args = parser.parse_args()
 
@@ -14,33 +16,32 @@
 char = {}
 lex = {}
 
-text_path = os.path.join(args.database_path,'text')
+text_path = os.path.join(args.database_path, 'text')
 with open(text_path) as f:
-  for line in f:
-    line = line.strip()
-    line_vect = line.split(' ')
-    for i in range(1,len(line_vect)):
-      characters = list(line_vect[i])
-      entry = " ".join(characters)
-      entry = entry.replace("#", "<HASH>")
-      if line_vect[i]:
-        lex[line_vect[i]] = entry
-
-if args.test_text > 1:
-  text_path = os.path.join(args.test_text,'text')
-  with open(text_path) as f:
     for line in f:
-      line = line.strip()
-      line_vect = line.split(' ')
-      for i in range(1,len(line_vect)):
-        characters = list(line_vect[i])
-        entry = " ".join(characters)
-        entry = entry.replace("#", "<HASH>")
-        if line_vect[i]:
-          lex[line_vect[i]] = entry
+        line = line.strip()
+        line_vect = line.split(' ')
+        for i in range(1, len(line_vect)):
+            characters = list(line_vect[i])
+            entry = " ".join(characters)
+            entry = entry.replace("#", "<HASH>")
+            if line_vect[i]:
+                lex[line_vect[i]] = entry
 
+if args.test_text > 1:
+    text_path = os.path.join(args.test_text, 'text')
+    with open(text_path) as f:
+        for line in f:
+            line = line.strip()
+            line_vect = line.split(' ')
+            for i in range(1, len(line_vect)):
+                characters = list(line_vect[i])
+                entry = " ".join(characters)
+                entry = entry.replace("#", "<HASH>")
+                if line_vect[i]:
+                    lex[line_vect[i]] = entry
 
 lex_file = os.path.join(args.dir, 'lexicon.txt')
 lex_fh = open(lex_file, 'w+')
 for key in sorted(lex):
-  lex_fh.write(key + " " + lex[key] + "\n")
+    lex_fh.write(key + " " + lex[key] + "\n")
diff --git a/egs/iam/s5/local/process_augment_data.py b/egs/iam/s5/local/process_augment_data.py
new file mode 100755
index 00000000000..e5f98b44f46
--- /dev/null
+++ b/egs/iam/s5/local/process_augment_data.py
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+import os
+import argparse
+
+parser = argparse.ArgumentParser(
+    description="""Regenerate images.scp, text, utt2spk and spk2utt from feats.scp for augment data""")
+parser.add_argument(
+    'dir', type=str, help='directory of images.scp')
+args = parser.parse_args()
+
+
+text_file = os.path.join(args.dir, 'backup', 'text')
+#text_file = os.path.join(args.dir, 'text.txt')
+text_dict = dict()  # stores imageID and text
+
+with open(text_file) as text_fh:
+    for uttID_text in text_fh:
+        uttID_text = uttID_text.strip()
+        uttID_text_vect = uttID_text.split(" ")
+        uttID = uttID_text_vect[0]
+        imageID = uttID.split("_")[1]
+        text_vect = uttID_text_vect[1:]
+        text = " ".join(text_vect)
+        text_dict[imageID] = text
+        # print "%s: %s" % (imageID, text)
+
+utt2spk_file = os.path.join(args.dir, 'backup', 'utt2spk')
+#utt2spk_file = os.path.join(args.dir, 'utt2spk')
+uttID_spk_dict = dict()  # stores imageID and speaker
+
+with open(utt2spk_file) as utt2spk_fh:
+    for uttID_spk in utt2spk_fh:
+        uttID_spk = uttID_spk.strip()
+        uttID_spk_vect = uttID_spk.split(" ")
+        uttID = uttID_spk_vect[0]
+        imageID = uttID.split("_")[1]
+        spk = uttID_spk_vect[1]
+        uttID_spk_dict[imageID] = spk
+        # print "%s: %s" % (imageID, spk)
+
+image_file = os.path.join(args.dir, 'backup', 'images.scp')
+#image_file = os.path.join(args.dir, 'images.scp')
+uttID_path_dict = dict()  # stores imageID and image path
+
+with open(image_file) as image_fh:
+    for uttID_path in image_fh:
+        uttID_path = uttID_path.strip()
+        uttID_path_vect = uttID_path.split(" ")
+        uttID = uttID_path_vect[0]
+        imageID = uttID.split("_")[1]
+        path = uttID_path_vect[1]
+        uttID_path_dict[imageID] = path
+        # print "%s: %s" % (imageID, path)
+
+
+image_file = os.path.join(args.dir + '/', 'images.scp')
+image_fh = open(image_file, 'w+')
+
+text_file = os.path.join(args.dir + '/', 'text')
+text_fh = open(text_file, 'w+')
+
+utt2spk_file = os.path.join(args.dir + '/', 'utt2spk')
+utt2spk_fh = open(utt2spk_file, 'w+')
+
+print('generate new files')
+feats_scp_file = os.path.join(args.dir, 'feats.scp')
+with open(feats_scp_file) as feats_scp_fh:
+    for uttID_image_path in feats_scp_fh:
+        uttID_image_path = uttID_image_path.strip()
+        uttID_path_vect = uttID_image_path.split(" ")
+        uttID = uttID_path_vect[0]
+        imageID = uttID.split("_")[1]
+        text_fh.write(uttID + ' ' + text_dict[imageID] + '\n')
+        utt2spk_fh.write(uttID + ' ' + uttID_spk_dict[imageID] + '\n')
+        image_fh.write(uttID + ' ' + uttID_path_dict[imageID] + '\n')
diff --git a/egs/iam/s5/local/process_data.py b/egs/iam/s5/local/process_data.py
index f9838d34563..ca954abac50 100755
--- a/egs/iam/s5/local/process_data.py
+++ b/egs/iam/s5/local/process_data.py
@@ -8,19 +8,19 @@
 import xml.dom.minidom as minidom
 
 parser = argparse.ArgumentParser(description="""Creates text utt2spk 
-                                                and image file """)
+                                 and image file """)
 parser.add_argument('database_path', type=str,
                     help='path to downloaded iam data')
 parser.add_argument('out_dir', type=str,
                     help='where to write output files')
+parser.add_argument('--model_type', type=str, default='word',
+                    choices=['word', 'character'],
+                    help='word model or character model')
 parser.add_argument('dataset_dir', type=str,
                     help='directory containing dataset')
 parser.add_argument('--dataset', type=str, default='new_trainset',
-                    choices=['new_trainset', 'new_testset','new_valset'],
-                    help='choose new_trainset, testset')
-parser.add_argument('--model_type', type=str,default='word',
-                    choices=['word', 'character'],
-                    help='word model or character model')
+                    choices=['new_trainset', 'new_testset', 'new_valset'],
+                    help='choose new_trainset, new_testset, new_valset')
 args = parser.parse_args()
 
 ### main ###
@@ -37,61 +37,65 @@
                             args.dataset + '.txt')
 
 text_file_path = os.path.join(args.database_path,
-                               'ascii','lines.txt')
+                              'ascii', 'lines.txt')
 text_dict = {}
+
+
 def process_text_file_for_word_model():
-  with open (text_file_path, 'rt') as in_file:
-    for line in in_file:
-      if line[0]=='#':
-        continue
-      line = line.strip()
-      line_vect = line.split(' ')
-      text_vect = line.split(' ')[8:]
-      text = "".join(text_vect)
-      text = text.replace("|", " ")
-      text_dict[line_vect[0]] = text
+    with open(text_file_path, 'rt') as in_file:
+        for line in in_file:
+            if line[0] == '#':
+                continue
+            line = line.strip()
+            line_vect = line.split(' ')
+            text_vect = line.split(' ')[8:]
+            text = "".join(text_vect)
+            text = text.replace("|", " ")
+            text_dict[line_vect[0]] = text
+
 
 def process_text_file_for_char_model():
-  with open (text_file_path, 'rt') as in_file:
-    for line in in_file:
-      if line[0]=='#':
-        continue
-      line = line.strip()
-      line_vect = line.split(' ')
-      text_vect = line.split(' ')[8:]
-      text = "".join(text_vect)
-      characters = list(text)
-      spaced_characters = " ".join(characters)
-      spaced_characters = spaced_characters.replace("|", "SIL")
-      spaced_characters = "SIL " + spaced_characters
-      spaced_characters = spaced_characters + " SIL"
-      text_dict[line_vect[0]] = spaced_characters
+    with open(text_file_path, 'rt') as in_file:
+        for line in in_file:
+            if line[0] == '#':
+                continue
+            line = line.strip()
+            line_vect = line.split(' ')
+            text_vect = line.split(' ')[8:]
+            text = "".join(text_vect)
+            characters = list(text)
+            spaced_characters = " ".join(characters)
+            spaced_characters = spaced_characters.replace("|", "SIL")
+            spaced_characters = "SIL " + spaced_characters
+            spaced_characters = spaced_characters + " SIL"
+            text_dict[line_vect[0]] = spaced_characters
 
 
-if args.model_type=='word':
-  print 'processing word model'
-  process_text_file_for_word_model()
+if args.model_type == 'word':
+    print 'processing word model'
+    process_text_file_for_word_model()
 else:
-  print 'processing char model'
-  process_text_file_for_char_model()
+    print 'processing char model'
+    process_text_file_for_char_model()
 
 with open(dataset_path) as f:
-  for line in f:
-    line = line.strip()
-    line_vect = line.split('-')
-    xml_file = line_vect[0] + '-' + line_vect[1]
-    xml_path = os.path.join(args.database_path, 'xml', xml_file + '.xml')
-    img_num = line[-3:] 
-    doc = minidom.parse(xml_path)
+    for line in f:
+        line = line.strip()
+        line_vect = line.split('-')
+        xml_file = line_vect[0] + '-' + line_vect[1]
+        xml_path = os.path.join(args.database_path, 'xml', xml_file + '.xml')
+        img_num = line[-3:]
+        doc = minidom.parse(xml_path)
 
-    form_elements = doc.getElementsByTagName('form')[0]
-    writer_id = form_elements.getAttribute('writer-id')
-    outerfolder = form_elements.getAttribute('id')[0:3]
-    innerfolder = form_elements.getAttribute('id')
-    lines_path = os.path.join(args.database_path, 'lines', outerfolder, innerfolder, innerfolder)
-    image_file_path = lines_path + img_num + '.png'
-    text =  text_dict[line]
-    utt_id = writer_id + '_' + line
-    text_fh.write(utt_id + ' ' + text + '\n')
-    utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
-    image_fh.write(utt_id + ' ' + image_file_path + '\n')
+        form_elements = doc.getElementsByTagName('form')[0]
+        writer_id = form_elements.getAttribute('writer-id')
+        outerfolder = form_elements.getAttribute('id')[0:3]
+        innerfolder = form_elements.getAttribute('id')
+        lines_path = os.path.join(
+            args.database_path, 'lines', outerfolder, innerfolder, innerfolder)
+        image_file_path = lines_path + img_num + '.png'
+        text = text_dict[line]
+        utt_id = writer_id + '_' + line
+        text_fh.write(utt_id + ' ' + text + '\n')
+        utt2spk_fh.write(utt_id + ' ' + writer_id + '\n')
+        image_fh.write(utt_id + ' ' + image_file_path + '\n')
diff --git a/egs/iam/s5/local/unk_arc_post_to_transcription.py b/egs/iam/s5/local/unk_arc_post_to_transcription.py
index c27bf226cf9..ee09db54702 100755
--- a/egs/iam/s5/local/unk_arc_post_to_transcription.py
+++ b/egs/iam/s5/local/unk_arc_post_to_transcription.py
@@ -5,82 +5,85 @@
 import sys
 import numpy as np
 from scipy import misc
-parser = argparse.ArgumentParser(description="""uses phones to convert unk to word""")
+parser = argparse.ArgumentParser(
+    description="""uses phones to convert unk to word""")
 parser.add_argument('phones', type=str, help='phones and phonesID')
 parser.add_argument('words', type=str, help='word and wordID')
 parser.add_argument('unk', type=str, default='-', help='location of unk file')
-parser.add_argument('--input-ark', type=str, default='-', help='where to read the input data')
-parser.add_argument('--out-ark', type=str, default='-', help='where to write the output data')
+parser.add_argument('--input-ark', type=str, default='-',
+                    help='where to read the input data')
+parser.add_argument('--out-ark', type=str, default='-',
+                    help='where to write the output data')
 args = parser.parse_args()
 ### main ###
 phone_fh = open(args.phones, 'r')
 word_fh = open(args.words, 'r')
-unk_fh = open(args.unk,'r')
+unk_fh = open(args.unk, 'r')
 if args.input_ark == '-':
     input_fh = sys.stdin
 else:
-    input_fh = open(args.input_ark,'r')
+    input_fh = open(args.input_ark, 'r')
 if args.out_ark == '-':
     out_fh = sys.stdout
 else:
-    out_fh = open(args.out_ark,'wb')
+    out_fh = open(args.out_ark, 'wb')
 
-phone_dict = dict()# stores phoneID and phone mapping
+phone_dict = dict()  # stores phoneID and phone mapping
 phone_data_vect = phone_fh.read().strip().split("\n")
 for key_val in phone_data_vect:
-  key_val = key_val.split(" ")
-  phone_dict[key_val[1]] = key_val[0]
+    key_val = key_val.split(" ")
+    phone_dict[key_val[1]] = key_val[0]
 word_dict = dict()
 word_data_vect = word_fh.read().strip().split("\n")
 for key_val in word_data_vect:
-  key_val = key_val.split(" ")
-  word_dict[key_val[1]] = key_val[0]
+    key_val = key_val.split(" ")
+    word_dict[key_val[1]] = key_val[0]
 unk_val = unk_fh.read().strip().split(" ")[0]
 
 utt_word_dict = dict()
-utt_phone_dict = dict()# stores utteranceID and phoneID
+utt_phone_dict = dict()  # stores utteranceID and phoneID
 unk_word_dict = dict()
-count=0
+count = 0
 for line in input_fh:
-  line_vect = line.strip().split("\t")
-  if len(line_vect) < 6:
-    print "IndexError"
-    print line_vect
-    continue
-  uttID = line_vect[0]
-  word = line_vect[4]
-  phones = line_vect[5]
-  if uttID in utt_word_dict.keys():
-    utt_word_dict[uttID][count] = word
-    utt_phone_dict[uttID][count] = phones
-  else:
-    count = 0
-    utt_word_dict[uttID] = dict()
-    utt_phone_dict[uttID] = dict()
-    utt_word_dict[uttID][count] = word
-    utt_phone_dict[uttID][count] = phones
-  if word == unk_val: # get character sequence for unk
-    phone_key_vect = phones.split(" ")
-    phone_val_vect = list()
-    for pkey in phone_key_vect:
-      phone_val_vect.append(phone_dict[pkey])
-    phone_2_word = list()
-    for phone_val in phone_val_vect:
-      phone_2_word.append(phone_val.split('_')[0])
-    phone_2_word = ''.join(phone_2_word)
-    utt_word_dict[uttID][count] = phone_2_word
-  else:
-    if word == '0':
-      word_val = ' '
+    line_vect = line.strip().split("\t")
+    if len(line_vect) < 6:
+        print "IndexError"
+        print line_vect
+        continue
+    uttID = line_vect[0]
+    word = line_vect[4]
+    phones = line_vect[5]
+    if uttID in utt_word_dict.keys():
+        utt_word_dict[uttID][count] = word
+        utt_phone_dict[uttID][count] = phones
     else:
-      word_val = word_dict[word]
-    utt_word_dict[uttID][count] = word_val
-  count += 1
+        count = 0
+        utt_word_dict[uttID] = dict()
+        utt_phone_dict[uttID] = dict()
+        utt_word_dict[uttID][count] = word
+        utt_phone_dict[uttID][count] = phones
+    if word == unk_val:  # get character sequence for unk
+        phone_key_vect = phones.split(" ")
+        phone_val_vect = list()
+        for pkey in phone_key_vect:
+            phone_val_vect.append(phone_dict[pkey])
+        phone_2_word = list()
+        for phone_val in phone_val_vect:
+            phone_2_word.append(phone_val.split('_')[0])
+        phone_2_word = ''.join(phone_2_word)
+        utt_word_dict[uttID][count] = phone_2_word
+    else:
+        if word == '0':
+            word_val = ' '
+        else:
+            word_val = word_dict[word]
+        utt_word_dict[uttID][count] = word_val
+    count += 1
 
 transcription = ""
 for key in sorted(utt_word_dict.iterkeys()):
-  transcription = key
-  for index in sorted(utt_word_dict[key].iterkeys()):
-    value = utt_word_dict[key][index]
-    transcription = transcription + " " + value
-  out_fh.write(transcription + '\n')
+    transcription = key
+    for index in sorted(utt_word_dict[key].iterkeys()):
+        value = utt_word_dict[key][index]
+        transcription = transcription + " " + value
+    out_fh.write(transcription + '\n')
diff --git a/egs/iam/s5/run.sh b/egs/iam/s5/run.sh
index d1eeda7e0d1..27b72ff1fff 100755
--- a/egs/iam/s5/run.sh
+++ b/egs/iam/s5/run.sh
@@ -5,37 +5,28 @@ nj=20
 color=1
 data_dir=data
 exp_dir=exp
-augment=false
+augment=true
+
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
 . utils/parse_options.sh  # e.g. this parses the --stage option if supplied.
 
 if [ $stage -le 0 ]; then
+  # data preparation
   local/prepare_data.sh --nj $nj --dir $data_dir
 fi
-mkdir -p $data_dir/{train,test}/data
 
 if [ $stage -le 1 ]; then
-  local/make_feature_vect.py $data_dir/test --scale-size 40 | \
-    copy-feats --compress=true --compression-method=7 \
-    ark:- ark,scp:$data_dir/test/data/images.ark,$data_dir/test/feats.scp || exit 1
-  steps/compute_cmvn_stats.sh $data_dir/test || exit 1;
-
-  if [ $augment = true ]; then
-    # create a backup directory to store text, utt2spk and image.scp file
-    mkdir -p $data_dir/train/backup
-    mv $data_dir/train/text $data_dir/train/utt2spk $data_dir/train/images.scp $data_dir/train/backup/
-    local/augment_and_make_feature_vect.py $data_dir/train --scale-size 40 --vertical-shift 10 | \
-      copy-feats --compress=true --compression-method=7 \
-      ark:- ark,scp:$data_dir/train/data/images.ark,$data_dir/train/feats.scp || exit 1
-    utils/utt2spk_to_spk2utt.pl $data_dir/train/utt2spk > $data_dir/train/spk2utt
-  else
-    local/make_feature_vect.py $data_dir/train --scale-size 40 | \
-      copy-feats --compress=true --compression-method=7 \
-      ark:- ark,scp:$data_dir/train/data/images.ark,$data_dir/train/feats.scp || exit 1
-  fi
-  steps/compute_cmvn_stats.sh $data_dir/train || exit 1;
+  for f in test; do
+    local/extract_feature.sh --nj $nj --cmd $cmd \
+     --scale_size 40 \
+     --augment $augment \
+     $data_dir/$f		 
+  
+    steps/compute_cmvn_stats.sh $data_dir/$f || exit 1;
+  done
 fi
+exit 0
 
 numSilStates=4
 numStates=8
@@ -149,8 +140,11 @@ if [ $stage -le 12 ]; then
     $exp_dir/tri3_ali
 fi
 
+
+affix=_aug
+nnet3_affix=_shear
+
 affix=_1a
-nnet3_affix=
 if [ $stage -le 13 ]; then
   local/chain/run_cnn_1a.sh --stage 0 \
    --gmm tri3 \

From b154f3b6713fd7ceabc4f6f54ac0502671da81fd Mon Sep 17 00:00:00 2001
From: YiwenShaoStephen <sywcs007wow@gmail.com>
Date: Wed, 1 Nov 2017 20:03:42 -0400
Subject: [PATCH 2/3] bug fixed in run.sh

---
 egs/iam/s5/local/make_feature_vect.py |  8 ++++----
 egs/iam/s5/local/prepare_lexicon.py   |  4 ++--
 egs/iam/s5/run.sh                     | 15 ++++++++-------
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/egs/iam/s5/local/make_feature_vect.py b/egs/iam/s5/local/make_feature_vect.py
index 697f9d92b86..8d00b56e8a5 100755
--- a/egs/iam/s5/local/make_feature_vect.py
+++ b/egs/iam/s5/local/make_feature_vect.py
@@ -11,10 +11,10 @@
 from signal import signal, SIGPIPE, SIG_DFL
 signal(SIGPIPE, SIG_DFL)
 
-parser = argparse.ArgumentParser(
-    description="""Generates and saves the feature vectors""")
-parser.add_argument(
-    'dir', type=str, help='directory of images.scp and is also output directory')
+parser = argparse.ArgumentParser(description="""Generates and saves
+                                 the feature vectors""")
+parser.add_argument('dir', type=str,
+                    help='directory of images.scp and is also output directory')
 parser.add_argument('--job', type=str, default='',
                     help='JOB number of images.JOB.scp')
 parser.add_argument('--out-ark', type=str, default='-',
diff --git a/egs/iam/s5/local/prepare_lexicon.py b/egs/iam/s5/local/prepare_lexicon.py
index 6a31e635a18..5844f809e1d 100755
--- a/egs/iam/s5/local/prepare_lexicon.py
+++ b/egs/iam/s5/local/prepare_lexicon.py
@@ -4,8 +4,8 @@
 import os
 import sys
 
-parser = argparse.ArgumentParser(
-    description="""Creates the list of characters and wor                                 ds in lexicon""")
+parser = argparse.ArgumentParser(description="""Creates the list of characters
+                                 and words in lexicon""")
 parser.add_argument('database_path', type=str, help='path to train text file')
 parser.add_argument('test_text', type=str,
                     help='path to test text file to include it in lexicon')
diff --git a/egs/iam/s5/run.sh b/egs/iam/s5/run.sh
index 27b72ff1fff..661ef97517f 100755
--- a/egs/iam/s5/run.sh
+++ b/egs/iam/s5/run.sh
@@ -5,7 +5,7 @@ nj=20
 color=1
 data_dir=data
 exp_dir=exp
-augment=true
+augment=false
 
 . ./cmd.sh ## You'll want to change cmd.sh to something that will work on your system.
            ## This relates to the queue.
@@ -16,17 +16,18 @@ if [ $stage -le 0 ]; then
   local/prepare_data.sh --nj $nj --dir $data_dir
 fi
 
+
 if [ $stage -le 1 ]; then
-  for f in test; do
+  for f in train test; do
+    mkdir -p $data_dir/$f/data      
     local/extract_feature.sh --nj $nj --cmd $cmd \
-     --scale_size 40 \
-     --augment $augment \
+      --scale_size 40 \
+      --augment $augment \
      $data_dir/$f		 
   
     steps/compute_cmvn_stats.sh $data_dir/$f || exit 1;
   done
 fi
-exit 0
 
 numSilStates=4
 numStates=8
@@ -141,8 +142,8 @@ if [ $stage -le 12 ]; then
 fi
 
 
-affix=_aug
-nnet3_affix=_shear
+affix=_1a
+nnet3_affix=
 
 affix=_1a
 if [ $stage -le 13 ]; then

From 0f77a74150a7fa17bc92d50c1144123d2e9daa51 Mon Sep 17 00:00:00 2001
From: YiwenShaoStephen <sywcs007wow@gmail.com>
Date: Wed, 1 Nov 2017 20:14:23 -0400
Subject: [PATCH 3/3] fix bugs in make_feature_vect.py

---
 egs/iam/s5/local/make_feature_vect.py | 5 ++---
 egs/iam/s5/run.sh                     | 3 ---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/egs/iam/s5/local/make_feature_vect.py b/egs/iam/s5/local/make_feature_vect.py
index 8d00b56e8a5..ecf4da02462 100755
--- a/egs/iam/s5/local/make_feature_vect.py
+++ b/egs/iam/s5/local/make_feature_vect.py
@@ -211,7 +211,6 @@ def image_augment(im, out_fh, image_id):
         # im_shift = vertical_shift(im, shift_setting[i])
         # data = np.transpose(im_shift, (1, 0))
         # data = np.divide(data, 255.0)
-        # new_scp_list.append(image_id + '_shift' + str(i + 1))
         # write_kaldi_matrix(out_fh, data, image_shift_id[i])
 
 
@@ -250,10 +249,10 @@ def image_augment(im, out_fh, image_id):
             image_path = line_vect[1]
             im = misc.imread(image_path)
             im_scaled = get_scaled_image(im)
-            im_contrast = contrast_normalization(im_scaled, 0.05, 0.2)
+            # im_contrast = contrast_normalization(im_scaled, 0.05, 0.2)
             # slant_degree = find_slant_project(im_contrast)
             # im_sheared = horizontal_shear(im_contrast, slant_degree)
             # im_padded = vertical_shift(im_scaled, 10)
-            data = np.transpose(im_contrast, (1, 0))
+            data = np.transpose(im_scaled, (1, 0))
             data = np.divide(data, 255.0)
             write_kaldi_matrix(out_fh, data, image_id)
diff --git a/egs/iam/s5/run.sh b/egs/iam/s5/run.sh
index 661ef97517f..6c94f732c86 100755
--- a/egs/iam/s5/run.sh
+++ b/egs/iam/s5/run.sh
@@ -141,11 +141,8 @@ if [ $stage -le 12 ]; then
     $exp_dir/tri3_ali
 fi
 
-
 affix=_1a
 nnet3_affix=
-
-affix=_1a
 if [ $stage -le 13 ]; then
   local/chain/run_cnn_1a.sh --stage 0 \
    --gmm tri3 \