diff --git a/lab3_hybrid/Descripcion_Imagenes b/lab3_hybrid/Descripcion_Imagenes new file mode 100644 index 0000000..17939e9 --- /dev/null +++ b/lab3_hybrid/Descripcion_Imagenes @@ -0,0 +1,3 @@ +La imagen 1 es de mi novio, la imagen original fue tomada en un Asado. La obtuve en Facebook. +La imagen 2 es mia y fue tomada en una fiesta, esta tambien fue tomada de Facebook. Se buscaron imagenes donde las caras estuvieran +aproximadamente en la misma orientación. Además se recortaron para considerar solamente la cara. diff --git a/lab3_hybrid/Imagen1.jpg b/lab3_hybrid/Imagen1.jpg new file mode 100644 index 0000000..26d52ee Binary files /dev/null and b/lab3_hybrid/Imagen1.jpg differ diff --git a/lab3_hybrid/Imagen1Original.jpg b/lab3_hybrid/Imagen1Original.jpg new file mode 100644 index 0000000..044e4d7 Binary files /dev/null and b/lab3_hybrid/Imagen1Original.jpg differ diff --git a/lab3_hybrid/Imagen2.jpg b/lab3_hybrid/Imagen2.jpg new file mode 100644 index 0000000..c7675fa Binary files /dev/null and b/lab3_hybrid/Imagen2.jpg differ diff --git a/lab3_hybrid/Imagen2Original.jpg b/lab3_hybrid/Imagen2Original.jpg new file mode 100644 index 0000000..1cd9ad8 Binary files /dev/null and b/lab3_hybrid/Imagen2Original.jpg differ diff --git a/lab3_hybrid/ImagenHibrida.jpg b/lab3_hybrid/ImagenHibrida.jpg new file mode 100644 index 0000000..a92fd6e Binary files /dev/null and b/lab3_hybrid/ImagenHibrida.jpg differ diff --git a/lab3_hybrid/Lab.m b/lab3_hybrid/Lab.m new file mode 100644 index 0000000..4bcaaac --- /dev/null +++ b/lab3_hybrid/Lab.m @@ -0,0 +1,64 @@ +function Lab +%%Laboratorio Imagenes Hibridas +close all +clear all +%Lectura de las imagenes +imagen1=imread('Imagen1.jpg'); +imagen2=imread('Imagen2.jpg'); + +tam_im1=size(imagen1); + +%Se cambia el tamaño de la Imagen 2 para que las dos imagenes queden del +%mismo tamaño +imagen2=imresize(imagen2,[tam_im1(1) tam_im1(2)]); + +%Creacion de Filtro Gaussiano para las imagenes +Gauss=fspecial('gaussian',17,15); + +%Filtro Pasa Bajas Imagen 1 +Low_Im1=imfilter(imagen1,Gauss); +%Filtro Pasa Altas Imagen 1 +High_Im1=imagen1-Low_Im1; + +%Filtro Pasa Bajas Imagen 2 +Low_Im2=imfilter(imagen2,Gauss); +%Filtro Pasa Altas Imagen 2 +High_Im2=imagen2-Low_Im2; + +%Creacion de la Imagen Hibrida +H=High_Im1+Low_Im2; + + +%Visualizacion de la imagen hibrida. (Función tomada de "Project 1: Image +%Filtering and Hybrid Image": url: +%http://cs.brown.edu/courses/cs143/proj1/. La funcion esta disponible en el codigo disponible +% en el .zip. Nombre de la Funcion vis_hybrid_image) +Result=vis_hybrid_image(H); +imshow(Result) +imwrite(Result,'Piramide.jpg') + +function output = vis_hybrid_image(hybrid_image) +%visualize a hybrid image by progressively downsampling the image and +%concatenating all of the images together. + +scales = 7; %how many downsampled versions to create +scale_factor = 0.8; %how much to downsample each time +padding = 5; %how many pixels to pad. + +original_height = size(hybrid_image,1); +num_colors = size(hybrid_image,3); %counting how many color channels the input has +output = hybrid_image; +cur_image = hybrid_image; + +for i = 2:scales + %add padding + output = cat(2, output, ones(original_height, padding, num_colors)); + + %dowsample image; + cur_image = imresize(cur_image, scale_factor, 'bilinear'); + %pad the top and append to the output + tmp = cat(1,ones(original_height - size(cur_image,1), size(cur_image,2), num_colors), cur_image); + output = cat(2, output, tmp); +end + +%code by James Hays \ No newline at end of file diff --git a/lab3_hybrid/Piramide.jpg b/lab3_hybrid/Piramide.jpg new file mode 100644 index 0000000..b1984ef Binary files /dev/null and b/lab3_hybrid/Piramide.jpg differ diff --git a/lab6_textons/Fierro_Reyes/Diccionario.m b/lab6_textons/Fierro_Reyes/Diccionario.m new file mode 100644 index 0000000..8520968 --- /dev/null +++ b/lab6_textons/Fierro_Reyes/Diccionario.m @@ -0,0 +1,33 @@ + +addpath('lib') +clear all;close all;clc; + +% create filter bank +[fb] = fbCreate; + +%Direccion='D:\Docs\Documents\UNIVERSIDAD\ULTIMO\Vision en Computador\Lab Textones\train'; +%Direccion=train; + +Imagenes=dir(fullfile(Direccion,'*.jpg')); +im=[]; + +%Concatenacion Imagenes +for i=1:30:length(Imagenes); + for j=0:2 + + im1=double(imread(fullfile(Direccion,Imagenes(i+j).name)))/255; + im = [im im1]; + disp(i+j); + end + +end + +%Numero de textones +k = 50; + +% % Creacion de diccionario de textones +[map,textons] = computeTextons(fbRun(fb,im),k); +save('Diccionario2.mat','map','textons'); + + + diff --git a/lab6_textons/Fierro_Reyes/Entrenamiento_NN.m b/lab6_textons/Fierro_Reyes/Entrenamiento_NN.m new file mode 100644 index 0000000..b91115f --- /dev/null +++ b/lab6_textons/Fierro_Reyes/Entrenamiento_NN.m @@ -0,0 +1,27 @@ +addpath('lib') +clear all;close all;clc; +tic +load 'Diccionario.mat' +% create filter bank +[fb] = fbCreate; +% Se crea la direccion en la que estan las imagenes +Direccion = 'textures/train'; +Imagenes=dir(fullfile(Direccion,'*.jpg')); +k = 50; +histrain=[]; +Nombre=cell(0); +% Se recorre el tama?o de las imagenes para asignarles los textones +for i=1:numel(Imagenes) + % se abre cada imagen +im2=double(imread(fullfile(Direccion,Imagenes(i).name)))/255; +% se asignan los textones +tmap = assignTextons(fbRun(fb,im2),textons'); +% Se obtienen los histogramas +histrain(i,:)=histc(tmap(:),1:k)/numel(tmap); +% se crean las etiquetas +Nombre{i}=Imagenes(i).name(2:3); +disp(i) +end +toc +% se guarda el entrenamiento +save('HistogramaTrain','histrain'); \ No newline at end of file diff --git a/lab6_textons/Fierro_Reyes/Fierro_Reyes.pdf b/lab6_textons/Fierro_Reyes/Fierro_Reyes.pdf new file mode 100644 index 0000000..36bb43c Binary files /dev/null and b/lab6_textons/Fierro_Reyes/Fierro_Reyes.pdf differ diff --git a/lab6_textons/Fierro_Reyes/Fierro_Reyes.tex b/lab6_textons/Fierro_Reyes/Fierro_Reyes.tex new file mode 100644 index 0000000..f099b3d --- /dev/null +++ b/lab6_textons/Fierro_Reyes/Fierro_Reyes.tex @@ -0,0 +1,199 @@ +\documentclass[10pt,twocolumn,letterpaper]{article} + +\usepackage{cvpr} +\usepackage{times} +\usepackage{epsfig} +\usepackage{graphicx} +\usepackage{subfig} +\usepackage{float} +\usepackage{caption} +\usepackage{subcaption} +\usepackage{amsmath} +\usepackage{amssymb} +\usepackage{upgreek} % para poner letras griegas sin cursiva +\usepackage{cancel} % para tachar +\usepackage{mathdots} % para el comando \iddots +\usepackage{mathrsfs} % para formato de letra +\usepackage{stackrel} % para el comando \stackbin +\usepackage{lscape} +\usepackage{adjustbox} +\usepackage[graphicx]{realboxes} +\usepackage{graphicx} +\usepackage{flushend} + + +% Include other packages here, before hyperref. + +% If you comment hyperref and then uncomment it, you should delete +% egpaper.aux before re-running latex. (Or just hit 'q' on the first latex +% run, let it finish, and you should be clear). +\usepackage[breaklinks=true,bookmarks=false]{hyperref} + +\cvprfinalcopy % *** Uncomment this line for the final submission + +\def\cvprPaperID{****} % *** Enter the CVPR Paper ID here +\def\httilde{\mbox{\tt\raisebox{-.5ex}{\symbol{126}}}} + +% Pages are numbered in submission mode, and unnumbered in camera-ready +%\ifcvprfinal\pagestyle{empty}\fi +\setcounter{page}{1} +\begin{document} + +%%%%%%%%% TITLE +\title{Textons and classifiers } + +\author{Carlos Andres Reyes Rivera\\ +Universidad de los Andes\\ +{\tt\small ca.reyes1787@uniandes.edu.co} +% For a paper whose authors are all at the same institution, +% omit the following lines up until the closing ``}''. +% Additional authors and addresses can be added with ``\and'', +% just like the second author. +% To save space, use either the email address or home page, not both +\and +Lina Maria Fierro Zambrano\\ +Universidad de los Andes\\ +{\tt\small lm.fierro1340@uniandes.edu.co} +} + +\maketitle +%\thispagestyle{empty} + +%%%%%%%%% ABSTRACT +\begin{abstract} + +The present lab is about classification methods and its used in a texture database from ponce group. The principal objective of this lab is to evaluate the classifiers k-nearest neighbour and random forest according to limitation, required time for training and results in the database. We find the better method was the Nearest neighbour with chi-square distance. + + +\end{abstract} + +\section{Database} + +The database for this lab comes from the page of the ponce group \cite{1}. It has 1000 images divided in 25 texture classes. Each class has 40 different images which has 640x480 pixels, it is in gray-scale and it is in JPG format. On this lab the database was divided in two parts 750 images of train and 250 images of test. The train part has 30 images of each texture class and the test has the other 10 images. + + +%%%%%%%%% BODY TEXT + + +\section{Representation} + +In present lab, the representation of image was based in textons. So, we first created a textons dictionary with three images per class, in other words 75 train images.Additionally we used 50 textons. +In the other hand, we use a filter bank, this containing Orient numbers even and odd-symmetric filters. The even-symmetric filter is a Gaussian second derivative and the odd-symmetric filter is its Hilbert transform. We try to use more images but it was not possible for computational resources. After this, assigned textons in all train images and create histograms of texton maps, this histograms was used to train classification models. + +\section{Classification} + + +\subsection{Nearest neighbour} + +Nearest neighbour is one of the most simple and fundamental classification methods. It was developed from the need to perform discriminant analysis when reliable parametric estimates of probability densities are unknown or difficult to determine\cite{2} . The main idea of the method is classify a new image in some category based on the distance to the training data. For this purpose, it fit a Voronoi diagram during the training stage. Then the method measures the distance between the Voronoi cells and the image and assigns a category for the image. This classifier is commonly based on the Euclidean distance, however, the classifier lets to use another distances. + +In this lab we represent the distance between the test images and the training with the chi-square +\begin{equation} +K\left ( H_{0}, H_{0'}\right )= \sum_{i=1}^{d}\frac{\left ( H_{0}\left ( i \right )- H_{0'}\left ( i \right )\right )^{2}}{ H_{0}\left ( i \right )+ H_{0'}\left ( i \right )} +\end{equation} +and the intersection kernel distance. +\begin{equation} +K\cap \left ( a,b \right )= \sum_{i=1}^{n} min\left ( a_{i}, b_{i} \right ) +\end{equation} + +In order to reach a better classification we used all of the train images to compare with the new data. For this purpose we obtained the histogram of the train images and the histogram of the new image. Then we used the chi-square and kernel intersection distance to find the image which has the closest histogram to the new data. Finally we assign the same label of the closest image to the new and repeat the same process for the next test image. + + +\subsection{Random Forest} + +Random forest is a classifier method proposed by Breiman at 1999.This method used a combination of tree predictors. Each tree depends on the values of a random vector and the same distribution in each tree \cite{3}. There are two random models: bagging and random node optimization. In the last model each node have random features. This method have some advantages like no-linear classifiers so more flexibility, the test stage is more efficient, and the over fitting problem we can resolve with randomization. But the disadvantage is the number of tuning parameters. \cite{4} + + +In this lab to implement this method we used all of the train images to compare with the new data. Next, we obtain histograms in the two subsets data and predicts new labels of test images. Finally we obtained the confusion matrix and statistics like recall and precision to see the results better. + + +\section{Results} + + +\subsection{Representation} + +We present some examples of textons map of train images and and their respective histograms. + +\begin{figure}[H] \centering \includegraphics[width=7cm]{images/Original3}\caption{Image 3, Class 1}\label{Comp}\end{figure} + +\begin{figure}[H] \centering \includegraphics[width=7cm]{images/Imagen3}\caption{Map textons and histogram image 3, Class 1}\label{Comp}\end{figure} + +The figure 1 is an original train image, this is class 1 (Bark1), figure 2 is map textons for this image and its histogram. In the other hand, figure 3 and 4 correspond to image 40 that it is class 2 (Bark2). + +\begin{figure}[H] \centering \includegraphics[width=7cm]{images/Original40}\caption{Image 40, Class 2}\label{Comp}\end{figure} + +\begin{figure}[H] \centering \includegraphics[width=7cm]{images/Imagen40}\caption{Map textons and histogram image 40, Class 2}\label{Comp}\end{figure} + + +It is evident that the originals images are very different but in map textons these are not evidence. Nevertheless the histograms show the differences that permit the classification. + + +\subsection{Nearest neighbour} + +To implement Nearest neighbour we tried to use the Matlab function fitcknn. However this function hasn't the chi square distance or the intersection kernel,so we decided to make our own function. First of all we made the dictionary, for this we use the first 3 images of each category. This process took at least six hour and we can took more image because we have a limit compute recourse.Then, we found the textons histograms with the function assigntextons and we assigned the label of the category which they belong. This concludes the training stage and we could continued with the next stage. In the training stage the elapsed time for the chi-square distance and the intersection kernel was 787.518294 seconds.\\ + +Once we had the histogram of each one of the training images we started with the test stage. As we made in the training, we use the assigntextons function and we obtained the histogram. To compare the histograms we first used the intersection kernel distances but this has many errors as we can see in the confusion matrix figure 7 and took 375.256802 seconds.Then we decided to probe the chi-square distances. This contrary to the other took 265.507637 seconds and obtain better results, at least find more that one category as we can see in the confusion matrix figure 6. + +\subsection{Random Forest} + + +To implement Random forest in the database we used the Matlab function Treebagger and our code is based in "kawahara.ca". So for this classifier, first we train model with all train images and each of one have the corresponding label. We used 20 trees because it can provide significant information but if the number of trees increase computing time; for this reason we choose this number. An example of trained trees can be observed in figure 5. + +\begin{figure}[H] \centering \includegraphics[width=8cm]{images/EjemploArbol1}\caption{Tree 1 Example }\label{Comp}\end{figure} + + +Once train model made, we continued with testing stage where we used function "histc" to obtain histograms to test images and with these new available data realized the predictions. After we compared the predictions with annotations to construct the confusion matrix with Matlab function "confusionmat". This result can be found in figure 8 of appendix. Additionally, the average precision of this method was 3.7\% and recall was 4\%, it is evident that this statistics are very small. + +With the parameters mentioned above (Number of tree, number of textons, number images in dictionary), this method have a duration time of 2137,5874 seconds including both training stage as test stage. + + +\section{Discussion of the results} + +Random forest and nearest neighbor represent one of the most popular ways to work in the classification problem. This methods have been used for many researchers around the world but the results are really different. This method has a strong dependence of the descriptors which it training. In this lab, we used textons as a descriptors but we realized that they couldn't be enough to separate the classes of the ponce database. Additionally we note that this base is to specific and is focus in similar textures. + +We also note that the textons are inefficient and required many computer resources. For this reason is necessary obtain only some descriptors and It would be a excellent descriptor is not enough to the ponce database. + +The dictionary is the part which took more time. As all the methods need a dictionary the seconds of different is irrelevant if we take this time into account . On the other hand if we pass over the dictionary stage the more efficient method is the Nearest Neighbor with intersection kernel distance. + +We can see that the results are not the best because the statistics are very small, but with this results we can conclude that the best method to classify image is Nearest Neighbor with distance Chi-Square where precision was 11\% and recall 12\%. The results are very small because all categories caused confusion perhaps because of the complexity of the database or obtained textons dictionary. + +\section{Improvements} +The worst problem with this lab is the need of computer resources to make the dictionary. One possible solution for these is use another method as sketch tokens. Another trouble that we realized, is the dependency of the classifier to the descriptor. How the textons are the unique descriptor the classifier depends completely of this. But, if we use another descriptor like the shape or we try to use the techniques of detection to classify the results of the classifier could improve. + +\begin{thebibliography}{1} + +\bibitem{1} S. Lazebnik, C. Schmid, and Jean Ponce.\emph{ A Sparse Texture Representation Using Local Affine Regions}.\hskip 1em plus 0.5em minus 0.4em\relax IEEE Transactions on Pattern Analysis and Machine Intelligence, vol. 27, no. 8, pp. 1265-1278, August 2005. + +\bibitem{2}Leif E. Peterson \emph{ K-nearest neighbor}\hskip 1em plus 0.5em minus 0.4em\relax Scholarpedia, 4(2):1883. + +\bibitem{3} L.Breiman. \emph{Random Forests}.\hskip 1em plus 0.5em minus 0.4em\relax Statistics Department, University of California, Berkeley, 2001 + +\bibitem{4} P.Arbelaez. \emph{Lecture 08: Classification}.\hskip 1em plus 0.5em minus 0.4em\relax Computer Vision, Universidad de los Andes. + +\end{thebibliography} + +\newpage + + +\ + + + +\title{Appendix} \centering + + +\begin{@twocolumnfalse} + + + +\begin{figure}[H] \includegraphics[width=17cm]{images/confusionchi}\caption{Confusion Matrix Nearest Neighbor with chi square distance }\label{Comp}\end{figure} + +\begin{figure}[H] \includegraphics[width=17cm]{images/confusionint}\caption{Confusion Matrix Nearest Neighbor with intersection kernel distance }\label{Comp}\end{figure} + +\begin{figure}[H] \includegraphics[width=17cm]{images/randomforest}\caption{Confusion Matrix Random Forest }\label{Comp}\end{figure} + + +\begin{@twocolumnfalse} +\end{@twocolumnfalse} + +\end{document} diff --git a/lab6_textons/Fierro_Reyes/MatrizConfusion.m b/lab6_textons/Fierro_Reyes/MatrizConfusion.m new file mode 100644 index 0000000..3bec3ae --- /dev/null +++ b/lab6_textons/Fierro_Reyes/MatrizConfusion.m @@ -0,0 +1,13 @@ +Test='D:\Docs\Documents\UNIVERSIDAD\ULTIMO\Vision_en_Computador\LabTextones\test'; +Imagenes_Test=dir(fullfile(Test,'*.jpg')); +load('ResultadosRandom.mat') +Anotaciones=[]; +for i=1:length(Imagenes_Test) + + Nombre=Imagenes_Test(i).name(2:3); + Anotaciones(i)=str2num(Nombre); +end + +Anotaciones=Anotaciones'; + +[C order]= confusionmat(Anotaciones,predictedClass) diff --git a/lab6_textons/Fierro_Reyes/RandomForest.m b/lab6_textons/Fierro_Reyes/RandomForest.m new file mode 100644 index 0000000..af27b6f --- /dev/null +++ b/lab6_textons/Fierro_Reyes/RandomForest.m @@ -0,0 +1,82 @@ + +addpath('lib') + +tic +Direccion='D:\Docs\Documents\UNIVERSIDAD\ULTIMO\Vision_en_Computador\LabTextones\train'; +Imagenes=dir(fullfile(Direccion,'*.jpg')); + +load('Diccionario2.mat') +tmap=cell(0); +Histogramas=cell(0); +[fb] = fbCreate; +k=50; +% Histograma de mapa de textones para imagenes de train +for i=1:numel(Imagenes), + + im2=double(imread(fullfile(Direccion,Imagenes(i).name)))/255; + tmap{i} = assignTextons(fbRun(fb,im2),textons'); + Histogramas {i}=histc(tmap{i}(:),1:k)/numel(tmap{i}); + disp(i) + +end + +%RANDOM FOREST +%Fuente Codigo: http://kawahara.ca/matlab-treebagger-example/ + +rng default + +%Creacion datos de entrenamiento +trainData=[]; +a=1; + for i=1:30:length(Histogramas)-29 + + for j=0:29 + Hist = [Histogramas{j+i}' a]; + trainData=[trainData;Hist]; + end + a=a+1; + end + +features = trainData(:,(1:50)); +classLabels = trainData(:,51); + +% Numero de Arboles +nTrees = 20; + +%Train the TreeBagger (Decision Forest). +B = TreeBagger(nTrees,features,classLabels, 'Method', 'classification'); + +%Etapa de Test +Test='D:\Docs\Documents\UNIVERSIDAD\ULTIMO\Vision_en_Computador\LabTextones\test'; +Imagenes_Test=dir(fullfile(Test,'*.jpg')); + +tmap_Test=cell(0); +Histogramas_Test=cell(0); +% Histograma de mapa de textones para imagenes de test +for i=1:numel(Imagenes_Test), + + im2=double(imread(fullfile(Test,Imagenes_Test(i).name)))/255; + tmap_Test{i} = assignTextons(fbRun(fb,im2),textons'); + Histogramas_Test {i}=histc(tmap_Test{i}(:),1:k)/numel(tmap_Test{i}); + disp(i) +end + +%Datos de test +newData=[]; +a=1; + for i=1:10:length(Histogramas_Test)-9 + + for j=0:9 + Hist_Test = [Histogramas_Test{j+i}']; + newData=[newData;Hist_Test]; + end + a=a+1; + end + +% %Predicciones de los nuevos datos +predChar1 = B.predict(newData); +% +% % Conversion de char a numero de las predicciones +predictedClass = str2double(predChar1) + +toc diff --git a/lab6_textons/Fierro_Reyes/Test_NN.m b/lab6_textons/Fierro_Reyes/Test_NN.m new file mode 100644 index 0000000..953bb74 --- /dev/null +++ b/lab6_textons/Fierro_Reyes/Test_NN.m @@ -0,0 +1,46 @@ +addpath('lib') +clear all;close all;clc; +tic +% se carga el diccionario y el entrenamiento +load 'Diccionario.mat' +load 'histrain.mat' +% create filter bank +[fb] = fbCreate; +% se crean las direcciones de test y train +Direccion = 'textures/train'; +Imagenes=dir(fullfile(Direccion,'*.jpg')); +k = 50; +DireccionT = 'textures/test'; +ImagenesT=dir(fullfile(DireccionT,'*.jpg')); +histest=[]; +Anotaciones=cell(0); +Predicciones=cell(0); +dist=[]; +% se crean las funciones de distancia chi cuadrado e interseccion de kernel +chi_2_distance = @(x,y)((bsxfun(@minus,x,y).^2)/(bsxfun(@plus,x,y))); +distance_kernel = @(x,Y) sum(bsxfun(@min,x,Y),2); +for i=1:numel(ImagenesT) + % se abren las imagenes +im2=double(imread(fullfile(DireccionT,ImagenesT(i).name)))/255; +% se asignan los textones a las imagenes de test +tmap = assignTextons(fbRun(fb,im2),textons'); +% se crean los histogramas +histest(i,:)=histc(tmap(:),1:k)/numel(tmap); +% se obtienen las anotaciones para cada imagen de test +Anotaciones{i}=ImagenesT(i).name(2:3); +% se compara con las distancias anteriormente obtenidas los histogramas del +% entrenamiento y del test +for j=1:numel(Imagenes) + %dist(j)=chi_2_distance(histest(i),histrain(j)); + dist(j)=distance_kernel(histest(i),histrain(j)); +end +% se encuentra la menor distancia +p=find(dist==min(dist)); +% Se le asigna la etiqueta de la imagen con la menor distancia +Predicciones{i}=Imagenes(p,1).name(2:3); +disp(i) +end +toc +% se contruye la matriz de confusion +[C order]= confusionmat(Anotaciones,Predicciones) +save('Confusion','C','order'); \ No newline at end of file diff --git a/lab8_cnn/Fierro_Reyes_Arquitectura.m b/lab8_cnn/Fierro_Reyes_Arquitectura.m new file mode 100644 index 0000000..708e0a7 --- /dev/null +++ b/lab8_cnn/Fierro_Reyes_Arquitectura.m @@ -0,0 +1,35 @@ +function net = Fierro_Reyes_Arquitectura() + +f=1/100 ; +net.layers = {} ; +%Size Input: 128x128x1 +net.layers{end+1} = struct('type', 'conv', ... + 'filters', f*randn(15,15,1,25, 'single'), ... + 'biases', zeros(1, 25, 'single'), ... + 'stride', 2, ... + 'pad', 0) ; + +%Size Input: 57x57x25 +net.layers{end+1} = struct('type', 'pool', ... + 'method', 'max', ... + 'pool', [4 4], ... + 'stride', 3, ... + 'pad', 0); +%Size Input: 18x18x25 +net.layers{end+1} = struct('type', 'conv', ... + 'filters', f*randn(7,7,25,25, 'single'), ... + 'biases', zeros(1, 25, 'single'), ... + 'stride', 3, ... + 'pad', 0) ; +%Size Input: 4x4x25 +net.layers{end+1} = struct('type', 'pool', ... + 'method', 'max', ... + 'pool', [4 4], ... + 'stride', 1, ... + 'pad', 0) ; + +%Size Input: 1x1x25 +net.layers{end+1} = struct('type', 'relu') ; +net.layers{end+1} = struct('type', 'softmaxloss') ; + + diff --git a/lab8_cnn/Fierro_Reyes_Train.m b/lab8_cnn/Fierro_Reyes_Train.m new file mode 100644 index 0000000..453a789 --- /dev/null +++ b/lab8_cnn/Fierro_Reyes_Train.m @@ -0,0 +1,123 @@ +function Fierron_Reyes_Train(varargin) +% EXERCISE4 Part 4 of the VGG CNN practical + +setup ; + +% ------------------------------------------------------------------------- +% Part 4.1: prepare the data +% ------------------------------------------------------------------------- + +% Load character dataset +imdb=load('textonsdb.mat') ; +imdb.images.data=im2single(imdb.images.data); + + +% ------------------------------------------------------------------------- +% Part 4.2: initialize a CNN architecture +% ------------------------------------------------------------------------- + +net = Fierro_Reyes_Arquitectura(); + +% ------------------------------------------------------------------------- +% Part 4.3: train and evaluate the CNN +% ------------------------------------------------------------------------- + +trainOpts.batchSize = 25 ; +trainOpts.numEpochs = 3; +trainOpts.continue = true ; +trainOpts.useGpu = false ; +trainOpts.learningRate = 0.01 ; +trainOpts.expDir = 'CNNFierroReyes_Final' ; +trainOpts = vl_argparse(trainOpts, varargin); + +% Take the average image out + +imdb.images.id= imdb.images.id(1:18750); +imdb.images.label= imdb.images.label(1:18750); +imdb.images.set= imdb.images.set(1:18750); +imdb.images.data= imdb.images.data(:,:,1:18750); +imageMean = mean(imdb.images.data(:)) ; +imdb.images.data = imdb.images.data - imageMean ; + +% Convert to a GPU array if needed +if trainOpts.useGpu + imdb.images.data = gpuArray(imdb.images.data) ; +end + +% Call training function in MatConvNet +[net,info] = cnn_train(net, imdb, @getBatch, trainOpts) ; + +% Move the CNN back to the CPU if it was trained on the GPU +if trainOpts.useGpu + net = vl_simplenn_move(net, 'cpu') ; +end + +% Save the result for later use +net.layers(end) = [] ; +net.imageMean = imageMean ; +save('CNNFierroReyes_Final/Fierro_Reyes_CNN.mat', '-struct', 'net') ; + +% % ------------------------------------------------------------------------- +% % Part 4.6: train with jitter +% % ------------------------------------------------------------------------- + +trainOpts.batchSize = 25 ; +trainOpts.numEpochs = 3 ; +trainOpts.continue = true ; +trainOpts.useGpu = false ; +trainOpts.learningRate = 0.01 ; +trainOpts.expDir = 'CNNFierroReyes_Final/WithJitt' ; + +% % Initlialize a new network +net = Fierro_Reyes_Arquitectura() ; + +% Call training function in MatConvNet +[net,info] = cnn_train(net, imdb, @getBatchWithJitter, trainOpts) ; + +% Move the CNN back to CPU if it was trained on GPU +if trainOpts.useGpu + net = vl_simplenn_move(net, 'cpu') ; +end + +% % Save the result for later use +net.layers(end) = [] ; +net.imageMean = imageMean ; +save('CNNFierroReyes_Final/WithJitt/Fierro_Reyes_jit.mat', '-struct', 'net') ; + + +% -------------------------------------------------------------------- +function [im, labels] = getBatch(imdb, batch) +% -------------------------------------------------------------------- +im = imdb.images.data(:,:,batch) ; +im = 256 * reshape(im, 128, 128, 1, []) ; +labels = imdb.images.label(1,batch) ; + +% -------------------------------------------------------------------- +function [im, labels] = getBatchWithJitter(imdb, batch) +% -------------------------------------------------------------------- +im = imdb.images.data(:,:,batch) ; +labels = imdb.images.label(1,batch) ; + +n = numel(batch) ; +train = find(imdb.images.set == 1) ; + +sel = randperm(numel(train), n) ; +im1 = imdb.images.data(:,:,sel) ; + +sel = randperm(numel(train), n) ; +im2 = imdb.images.data(:,:,sel) ; + +ctx = [im1 im2] ; +ctx(:,17:48,:) = min(ctx(:,17:48,:), im) ; + +dx = randi(11) - 6 ; +im = ctx(:,(17:48)+dx,:) ; +sx = (17:48) + dx ; + +dy = randi(5) - 2 ; +sy = max(1, min(128, (1:128) + dy)) ; + +im = ctx(sy,sx,:) ; + +im = 256 * reshape(im, 128, 128, 1, []) ; + diff --git a/lab8_cnn/RedesNeuronales.md b/lab8_cnn/RedesNeuronales.md new file mode 100644 index 0000000..6eea667 --- /dev/null +++ b/lab8_cnn/RedesNeuronales.md @@ -0,0 +1,10 @@ + +## Explication +The net is made of 4 layers, the first and the third one are convolutional layers and the second and fourth one are pool layer. For the convolutional layer we decided to use a 15 x 15 kernel and a 7 x 7 kernel respectively. This decision was taken because we realized the textures aren't discriminative between them in small scales. In other words a small kernel doesn't work in this problem. In the other hand, in the "pooling" stage, we decided take a maximum in [ 4 4 ] and [4 4] windows. This values was chosen in order to take a significative region, again in small scales the textures aren't differents. + +For the final net, we tried 4 different configurations but the most of these taken a very much time and the error was not significative difference with the final net. First of all we tried a 14 layers with some variation in the number of epoch, then we tried to reduce the number of layer and we made a 8 layers net but as we said these take a very much time and te results are not significative better so we decides to reduce again the net and made the final 6 layer net (2 convolutional 2 pooling, one of relu and one of softmaxloss). + +## Results +To train the neural network we use a batch size of 25 and 3 epoch, this means that in each epoch there was 750 batches. The time that took to train the proposed Neural Networks was 60 minutos. In average each batch lasted 1 seconds, this means that the net process from 15 to 20 image for second. So the time of each epoch was 30 minutes. Respect to the results, in training set we has an error average 80% in the first epoch. We trained the network but we realized that the net has a problem because when the number of images are greater than 700 the filters were not working. For this we could not test our neural network. + + diff --git a/lab8_cnn/train_net.m b/lab8_cnn/train_net.m new file mode 100644 index 0000000..a2f151a --- /dev/null +++ b/lab8_cnn/train_net.m @@ -0,0 +1,21 @@ +function res=train_net(net,test_data) + +setup ; + +for i=1:size(test_data,3) +net = load('CNNFierroReyes\Fierro_Reyes_CNN.mat') ; + +im = test_data(:,:,i) ; +im_ = im2single(im) ; +im_ = 256 * (im_ - net.imageMean) ; +im_ = imresize(im_, [128 128]) ; + +res = vl_simplenn(net, im_) ; + +scores = squeeze(gather(res(end).x)); +[bestScore, best] = max(scores); + +categ(i)=best +end + +res=categ; \ No newline at end of file