diff --git a/README.md b/README.md
index cad1abd..c527842 100644
--- a/README.md
+++ b/README.md
@@ -1,20 +1,117 @@
 CUDA Rasterizer
-===============
-
-[CLICK ME FOR INSTRUCTION OF THIS PROJECT](./INSTRUCTION.md)
+================
 
 **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 4**
 
-* (TODO) YOUR NAME HERE
-* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
+* Xincheng Zhang
+* Tested on:
+  *Windows 10, i7-4702HQ @ 2.20GHz 8GB, GTX 870M 3072MB (Personal Laptop)
+
+.
+### Description&Features
+-------------
+**In this project, I use CUDA to implement a simplified rasterized graphics pipeline, similar to the OpenGL pipeline. Specific content and features are listed below.**
+
+Basic part: Rasterizer pipeline
+* Vertex shading
+* Primitive assembly with support for triangles read from buffers of index and vertex data
+* Rasterization
+* Fragment shading
+* A depth buffer for storing and depth testing fragments
+* Fragment-to-depth-buffer writing (with atomics for race avoidance)
+* (Fragment shader) simple lighting scheme, Lambert and Blinn-Phong.
+
+Extra part:
+* Texture with perspective correctness and bilinear interpolation
+* Point and line primitive implementation 
+* Optimization for point & line primitive by sampling vertices around bound and corner of bounding box
+* (still working) SSAO & Tile base pipeline
+
+
+.
+### Result in Progress
+-------------
+**Result Screenshot**
+
+![](https://github.com/XinCastle/Project4-CUDA-Rasterizer/blob/master/renders/Overall%20sample1.gif)
+
+![](https://github.com/XinCastle/Project4-CUDA-Rasterizer/blob/master/renders/Overall%20Sample2.gif)
+* The duck and CesiumMilkTruck object with texture
+* w/ perspective correctness & bilinear interpolation
+* Blinn Phong and lambert are both implemented
+
+.
+
+**Perspective Correct texture coordinate**
+* Original checkerboard texture:
+
+![](https://github.com/XinCastle/Project4-CUDA-Rasterizer/blob/master/renders/Without%20Perspective%20correction.gif) 
+
+* with perspective correctness:
+
+![](https://github.com/XinCastle/Project4-CUDA-Rasterizer/blob/master/renders/With%20perspective%20correction.gif)
+* It's obvious that for the original one, the texture of checkerboard only appear to be correct on the angle of 45 degree and is twisted when we rotate the object. The perspective correctness solves this problem by using the method below. (reference: https://en.wikipedia.org/wiki/Texture_mapping)
+```
+#if PERSPECTIVE
+    	glm::vec3 zbuffer(1.f / triEyePos[0].z, 1.f / triEyePos[1].z, 1.f / triEyePos[2].z);
+    	glm::vec3 bottom(barycoord.x * zbuffer[0], barycoord.y * zbuffer[1], barycoord.z * zbuffer[2]);
+    	float sumbot = bottom.x + bottom.y + bottom.z;
+    	float denominator_inv = 1.f / sumbot;		
+    	glm::vec2 numerator = bottom.x * triTexcoord0[0] + bottom.y * triTexcoord0[1] + bottom.z * triTexcoord0[2];	
+    	dev_fragmentBuffer[pixelid].texcoord0 = numerator * denominator_inv;
+#else
+		dev_fragmentBuffer[pixelid].texcoord0 = texB;
+#endif
+```
+
+.
+
+**Bilinear Interpolation**
+
+* Original Checkerboard  
+
+![](https://github.com/XinCastle/Project4-CUDA-Rasterizer/blob/master/renders/No%20Bilinear%20Interpolation.png)
+   
+
+*  w/ Bilinear Interpolation
+
+![](https://github.com/XinCastle/Project4-CUDA-Rasterizer/blob/master/renders/Has%20Bilinear%20Interpolation.png)
+
+* The mosaic of original checkerboard is erased by applying bilinear interpolation method below. (reference: https://en.wikipedia.org/wiki/Bilinear_interpolation)
+```
+__device__
+glm::vec3 bilinearInterpolation(float a, float b, glm::vec3 txy, glm::vec3 txplus1, glm::vec3 typuls1, glm::vec3 txyplus1) 
+{
+	glm::vec3 temp = (1.f - a) * txy + a * txplus1;
+	glm::vec3 temp1 = (1.f - a) * typuls1 + a * txyplus1;
+	return temp * (1.f - b) + temp1 * b;
+}
+```
+
+.
+
+
+**Points and lines Primitives**
+* Points Primitives
+
+![](https://github.com/XinCastle/Project4-CUDA-Rasterizer/blob/master/renders/Point%20example.gif)
+
+* Lines Primitives
 
-### (TODO: Your README)
+![](https://github.com/XinCastle/Project4-CUDA-Rasterizer/blob/master/renders/line%20example.gif)
 
-*DO NOT* leave the README to the last minute! It is a crucial part of the
-project, and we will not be able to grade you without a good README.
+.
 
+**SSAO**
+* still working. Will update later.
 
-### Credits
+.
 
-* [tinygltfloader](https://github.com/syoyo/tinygltfloader) by [@soyoyo](https://github.com/syoyo)
-* [glTF Sample Models](https://github.com/KhronosGroup/glTF/blob/master/sampleModels/README.md)
+### Point and lines Primitives Optimization
+-------------
+* There is a problem of simply checking whether the point is inside the bounding box or not for points primitive implementation because there are a number of points whose fragment is in outside of bounding box but the point itself is in. In that case, the naive approach will lose many points which should be drawn.
+* My approach: instead of checking whether the point is inside the box, I check whether the point is around the bounding box by setting a coefficient in barycentric function so that more points will show in the result. Same idea for lines (partial code for lines primitive condition below).
+```
+	if (glm::abs(1 - glm::abs(barycentricCoord.x + barycentricCoord.y)) <= 0.07f && glm::abs(barycentricCoord.z) <= 0.07f) return true;
+```
+As the code above, do the same for the other two conditions to check lines primitives. Again, in this method, by modifying the value (0.07 for example), we can change the radius of the line we draw shown on the screen.
\ No newline at end of file
diff --git a/renders/Has Bilinear Interpolation.png b/renders/Has Bilinear Interpolation.png
new file mode 100644
index 0000000..149d9bc
Binary files /dev/null and b/renders/Has Bilinear Interpolation.png differ
diff --git a/renders/No Bilinear Interpolation.png b/renders/No Bilinear Interpolation.png
new file mode 100644
index 0000000..05f1ad7
Binary files /dev/null and b/renders/No Bilinear Interpolation.png differ
diff --git a/renders/Overall Sample2.gif b/renders/Overall Sample2.gif
new file mode 100644
index 0000000..f879125
Binary files /dev/null and b/renders/Overall Sample2.gif differ
diff --git a/renders/Overall sample1.gif b/renders/Overall sample1.gif
new file mode 100644
index 0000000..0d15cb1
Binary files /dev/null and b/renders/Overall sample1.gif differ
diff --git a/renders/Point example.gif b/renders/Point example.gif
new file mode 100644
index 0000000..cb72499
Binary files /dev/null and b/renders/Point example.gif differ
diff --git a/renders/With perspective correction.gif b/renders/With perspective correction.gif
new file mode 100644
index 0000000..31df7c0
Binary files /dev/null and b/renders/With perspective correction.gif differ
diff --git a/renders/Without Perspective correction.gif b/renders/Without Perspective correction.gif
new file mode 100644
index 0000000..67d998d
Binary files /dev/null and b/renders/Without Perspective correction.gif differ
diff --git a/renders/line example.gif b/renders/line example.gif
new file mode 100644
index 0000000..76e3906
Binary files /dev/null and b/renders/line example.gif differ
diff --git a/src/rasterize.cu b/src/rasterize.cu
index 1262a09..b77cf83 100644
--- a/src/rasterize.cu
+++ b/src/rasterize.cu
@@ -6,6 +6,7 @@
  * @copyright University of Pennsylvania & STUDENT
  */
 
+#include <iostream>
 #include <cmath>
 #include <cstdio>
 #include <cuda.h>
@@ -18,6 +19,16 @@
 #include <glm/gtc/quaternion.hpp>
 #include <glm/gtc/matrix_transform.hpp>
 
+
+#define BLINN 1
+#define LAMBERT 0
+#define PERSPECTIVE 1
+#define BILINEAR 1
+#define POINTSHADING 0
+#define LINESHADING 0
+#define SOLID 1
+
+
 namespace {
 
 	typedef unsigned short VertexIndex;
@@ -43,16 +54,19 @@ namespace {
 
 		 glm::vec3 eyePos;	// eye space position used for shading
 		 glm::vec3 eyeNor;	// eye space normal used for shading, cuz normal will go wrong after perspective transformation
-		// glm::vec3 col;
+		 glm::vec3 col;
 		 glm::vec2 texcoord0;
 		 TextureData* dev_diffuseTex = NULL;
-		// int texWidth, texHeight;
+		 int texWidth, texHeight;
 		// ...
 	};
 
 	struct Primitive {
 		PrimitiveType primitiveType = Triangle;	// C++ 11 init
 		VertexOut v[3];
+		TextureData* tex = NULL;
+		int diffuseTexWidth;
+		int diffuseTexHeight;
 	};
 
 	struct Fragment {
@@ -62,10 +76,12 @@ namespace {
 		// The attributes listed below might be useful, 
 		// but always feel free to modify on your own
 
-		// glm::vec3 eyePos;	// eye space position used for shading
-		// glm::vec3 eyeNor;
-		// VertexAttributeTexcoord texcoord0;
-		// TextureData* dev_diffuseTex;
+		 glm::vec3 eyePos;	// eye space position used for shading
+		 glm::vec3 eyeNor;
+		 VertexAttributeTexcoord texcoord0;
+		 TextureData* dev_diffuseTex = NULL;
+		 int diffuseTexWidth;
+		 int diffuseTexHeight;
 		// ...
 	};
 
@@ -133,6 +149,52 @@ void sendImageToPBO(uchar4 *pbo, int w, int h, glm::vec3 *image) {
     }
 }
 
+
+#define COL(C) (C / 255.0)
+#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+#define MAX(X, Y) (((X) > (Y)) ? (X) : (Y))
+
+
+
+
+__device__
+glm::vec3 bilinearInterpolation(float a, float b, glm::vec3 txy, glm::vec3 txplus1, glm::vec3 typuls1, glm::vec3 txyplus1) 
+{
+	glm::vec3 temp = (1.f - a) * txy + a * txplus1;
+	glm::vec3 temp1 = (1.f - a) * typuls1 + a * txyplus1;
+	return temp * (1.f - b) + temp1 * b;
+}
+
+
+
+__host__ __device__
+glm::vec3 getTexColor(TextureData* tex, int stride, int u, int v)
+{
+	int idx = (u + v * stride) * 3;
+	return glm::vec3(COL(tex[idx + 0]),
+		COL(tex[idx + 1]),
+		COL(tex[idx + 2]));
+}
+
+
+__host__ __device__
+int clamp(int v, int a, int b)
+{
+	return MIN(MAX(a, v), b);
+}
+
+
+
+template<class T>
+__host__ __device__
+T lerp(float v, T a, T b)
+{
+	return a * (1.0f - v) + v * b;
+}
+
+
+
+
 /** 
 * Writes fragment colors to the framebuffer
 */
@@ -146,7 +208,96 @@ void render(int w, int h, Fragment *fragmentBuffer, glm::vec3 *framebuffer) {
         framebuffer[index] = fragmentBuffer[index].color;
 
 		// TODO: add your fragment shader code here
-
+		Fragment &tmpfrag = fragmentBuffer[index];
+		glm::vec3 tmpcol = tmpfrag.color;
+		glm::vec3 result(0.f, 0.f, 0.f);
+		glm::vec3 tmppos = tmpfrag.eyePos;
+		glm::vec3 tmpnor = tmpfrag.eyeNor;
+
+
+		glm::vec3 lightpos(60.f, 60.f, 60.f);
+		glm::vec3 lightdir = glm::normalize((lightpos - tmppos));
+		glm::vec3 specColor(0.f, 0.f, 0.f);
+		float ambient = 0.1;
+		float shininess = 32.0f;
+		float lambertian = glm::max(glm::dot(lightdir, tmpnor), 0.0f);
+		float specular = 0.0f;
+#if BLINN
+		glm::vec3 viewDir = glm::normalize(-tmppos);
+		glm::vec3 halfDir = normalize(lightdir + viewDir);
+		float specAngle = glm::max(glm::dot(halfDir, tmpnor), 0.0f);
+		specular = glm::pow(specAngle, shininess);
+		result = lambertian * tmpcol + ambient * glm::vec3(.9f, .9f, .9f) + specular * glm::vec3(1.f, 1.f, 1.f);
+#elif LAMBERT
+		resullt = lambertian * tmpcol + ambient * glm::vec3(.9f, .9f, .9f);
+#endif
+		//framebuffer[index] = result;
+
+
+		float texwidth = fragmentBuffer[index].diffuseTexWidth;
+		float texheight = fragmentBuffer[index].diffuseTexHeight; 
+
+		//Here, the TextureData* is char, which is 8 bit (2 to the power of 8) representing 0-255 color number.
+		//in this char, every 3 chars are used to express a single fragment's RGB values.
+		//therefore, when calculating uvindex, we need to multiply 3.
+		//moreover, when calculating the color value, we need to divide 255 to get its color value.
+		TextureData* tmptex = fragmentBuffer[index].dev_diffuseTex;
+		float ufloat = fragmentBuffer[index].texcoord0.x * texwidth;
+		float vfloat = fragmentBuffer[index].texcoord0.y * texheight;
+		//int u = fragmentBuffer[index].texcoord0.x * texwidth;
+		//int v = fragmentBuffer[index].texcoord0.y * texheight;
+		int u = clamp((int)ufloat, 0, texwidth - 1);
+		int v = clamp((int)vfloat, 0, texheight - 1);
+
+
+#if BILINEAR
+		//if (tmptex != NULL)
+		//{
+			//int pxy = 3 * (u + v * texwidth);
+			//int pxplus1 = 3 * (u + 1 + v * texwidth);
+			//int pyplus1 = 3 * (u + (v + 1) * texwidth);
+			//int pxyplus1 = 3 * (u + 1 + (v + 1) * texwidth);
+
+			//glm::vec3 texturexy(tmptex[pxy] / 255.f, tmptex[pxy + 1] / 255.f, tmptex[pxy + 2] / 255.f);
+			//glm::vec3 texturexplus1(tmptex[pxplus1] / 255.f, tmptex[pxplus1 + 1] / 255.f, tmptex[pxplus1 + 2] / 255.f);
+			//glm::vec3 textureyplus1(tmptex[pyplus1] / 255.f, tmptex[pyplus1 + 1] / 255.f, tmptex[pyplus1 + 2] / 255.f);
+			//glm::vec3 texturexyplus1(tmptex[pxyplus1] / 255.f, tmptex[pxyplus1 + 1] / 255.f, tmptex[pxyplus1 + 2] / 255.f);
+
+			//result = bilinearInterpolation((float)(ufloat - u), (float)(vfloat - v), texturexy, texturexplus1, textureyplus1, texturexyplus1);
+		//}
+		//else 
+		//{
+		//	printf("DAMN\n");
+		//	result = tmpfrag.color;
+		//}
+		if (tmptex != NULL)
+		{
+			float du = ufloat - u;
+			float dv = vfloat - v;
+			auto x0y0 = getTexColor(tmptex, texwidth, u + 0, v + 0);
+			auto x1y0 = getTexColor(tmptex, texwidth, u + 1, v + 0);
+			auto x0y1 = getTexColor(tmptex, texwidth, u + 0, v + 1);
+			auto x1y1 = getTexColor(tmptex, texwidth, u + 1, v + 1);
+			result = lerp<glm::vec3>(dv, lerp<glm::vec3>(du, x0y0, x1y0), lerp<glm::vec3>(du, x0y1, x1y1));
+		}
+		else
+		{
+			result = tmpfrag.color;
+		}
+#else
+		//printf("%i %i\n", u, v);
+		int uvindex = 3 * (u + v * texwidth);
+		//result = glm::vec3(tmptex[uvindex+0] / 255.f, tmptex[uvindex+1] / 255.f, tmptex[uvindex + 2] / 255.f);
+		if (tmptex != NULL)
+		{
+			result = glm::vec3(tmptex[uvindex] / 255.f, tmptex[uvindex + 1] / 255.f, tmptex[uvindex + 2] / 255.f);
+		}
+		else
+		{
+			result = tmpfrag.color;
+		}
+#endif
+		framebuffer[index] = result;
     }
 }
 
@@ -525,10 +676,12 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) {
 					int diffuseTexHeight = 0;
 					if (!primitive.material.empty()) {
 						const tinygltf::Material &mat = scene.materials.at(primitive.material);
-						printf("material.name = %s\n", mat.name.c_str());
+						//printf("material.name = %s\n", mat.name.c_str());
 
 						if (mat.values.find("diffuse") != mat.values.end()) {
+
 							std::string diffuseTexName = mat.values.at("diffuse").string_value;
+							
 							if (scene.textures.find(diffuseTexName) != scene.textures.end()) {
 								const tinygltf::Texture &tex = scene.textures.at(diffuseTexName);
 								if (scene.images.find(tex.source) != scene.images.end()) {
@@ -537,10 +690,10 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) {
 									size_t s = image.image.size() * sizeof(TextureData);
 									cudaMalloc(&dev_diffuseTex, s);
 									cudaMemcpy(dev_diffuseTex, &image.image.at(0), s, cudaMemcpyHostToDevice);
-									
+
 									diffuseTexWidth = image.width;
 									diffuseTexHeight = image.height;
-
+									//printf("%i\n", diffuseTexWidth);
 									checkCUDAError("Set Texture Image data");
 								}
 							}
@@ -554,7 +707,7 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) {
 
 					// ---------Node hierarchy transform--------
 					cudaDeviceSynchronize();
-					
+
 					dim3 numBlocksNodeTransform((numVertices + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x);
 					_nodeMatrixTransform << <numBlocksNodeTransform, numThreadsPerBlock >> > (
 						numVertices,
@@ -595,21 +748,21 @@ void rasterizeSetBuffers(const tinygltf::Scene & scene) {
 		} // for each node
 
 	}
-	
+
 
 	// 3. Malloc for dev_primitives
 	{
 		cudaMalloc(&dev_primitives, totalNumPrimitives * sizeof(Primitive));
 	}
-	
+
 
 	// Finally, cudaFree raw dev_bufferViews
 	{
 
 		std::map<std::string, BufferByte*>::const_iterator it(bufferViewDevPointers.begin());
 		std::map<std::string, BufferByte*>::const_iterator itEnd(bufferViewDevPointers.end());
-			
-			//bufferViewDevPointers
+
+		//bufferViewDevPointers
 
 		for (; it != itEnd; it++) {
 			cudaFree(it->second);
@@ -638,10 +791,31 @@ void _vertexTransformAndAssembly(
 		// Multiply the MVP matrix for each vertex position, this will transform everything into clipping space
 		// Then divide the pos by its w element to transform into NDC space
 		// Finally transform x and y to viewport space
+		
+		//first get the position of the vertex
+		glm::vec4 vp = glm::vec4(primitive.dev_position[vid],1.0f);
+		//then multiply mvp matrix
+		glm::vec4 vpedit = MVP * vp;
+		//divide w
+		vpedit = vpedit / vpedit.w;
+		//viewport
+		vpedit.x = 0.5f * (float)width * (1.0f - vpedit.x);
+		vpedit.y = 0.5f * (float)height * (1.0f - vpedit.y);
 
 		// TODO: Apply vertex assembly here
 		// Assemble all attribute arraies into the primitive array
-		
+		primitive.dev_verticesOut[vid].pos = vpedit;
+		primitive.dev_verticesOut[vid].eyePos = multiplyMV(MV,vp);
+		primitive.dev_verticesOut[vid].eyeNor = glm::normalize(MV_normal * primitive.dev_normal[vid]);
+		primitive.dev_verticesOut[vid].texHeight = primitive.diffuseTexHeight;
+		primitive.dev_verticesOut[vid].texWidth = primitive.diffuseTexWidth;
+		primitive.dev_verticesOut[vid].dev_diffuseTex = primitive.dev_diffuseTex;
+		if (primitive.dev_texcoord0 != NULL)
+		{
+			primitive.dev_verticesOut[vid].texcoord0 = primitive.dev_texcoord0[vid];
+		}
+
+		primitive.dev_verticesOut[vid].col = glm::vec3(0.8, 0.8, 0.8);
 	}
 }
 
@@ -660,12 +834,16 @@ void _primitiveAssembly(int numIndices, int curPrimitiveBeginId, Primitive* dev_
 		// TODO: uncomment the following code for a start
 		// This is primitive assembly for triangles
 
-		//int pid;	// id for cur primitives vector
-		//if (primitive.primitiveMode == TINYGLTF_MODE_TRIANGLES) {
-		//	pid = iid / (int)primitive.primitiveType;
-		//	dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType]
-		//		= primitive.dev_verticesOut[primitive.dev_indices[iid]];
-		//}
+		int pid;	// id for cur primitives vector
+		if (primitive.primitiveMode == TINYGLTF_MODE_TRIANGLES)
+		{
+			pid = iid / (int)primitive.primitiveType;
+			dev_primitives[pid + curPrimitiveBeginId].v[iid % (int)primitive.primitiveType]
+				= primitive.dev_verticesOut[primitive.dev_indices[iid]];
+			dev_primitives[pid + curPrimitiveBeginId].tex = primitive.dev_diffuseTex;
+			dev_primitives[pid + curPrimitiveBeginId].diffuseTexWidth = primitive.diffuseTexWidth;
+			dev_primitives[pid + curPrimitiveBeginId].diffuseTexHeight = primitive.diffuseTexHeight;
+		}
 
 
 		// TODO: other primitive types (point, line)
@@ -675,6 +853,172 @@ void _primitiveAssembly(int numIndices, int curPrimitiveBeginId, Primitive* dev_
 
 
 
+__global__ void _rasterization(int totalNumPrimitives, Primitive *dev_primitives, Fragment *dev_fragmentBuffer, int *dev_depth, int width, int height)
+{
+	int pid = (blockIdx.x * blockDim.x) + threadIdx.x;
+	if (pid < totalNumPrimitives)
+	{
+		//get AABB. to use getAABBfortriangle we need to know glm::vec3 tri[3], so get it from dev_primitives
+		glm::vec3 tri[3] = {glm::vec3(dev_primitives[pid].v[0].pos), glm::vec3(dev_primitives[pid].v[1].pos), glm::vec3(dev_primitives[pid].v[2].pos) };
+		//as well as all the other attributes in primitives: eyepos,eyenor,texcoord,color
+		glm::vec3 triEyePos[3] = { dev_primitives[pid].v[0].eyePos, dev_primitives[pid].v[1].eyePos, dev_primitives[pid].v[2].eyePos };
+		glm::vec3 triEyeNor[3] = { dev_primitives[pid].v[0].eyeNor, dev_primitives[pid].v[1].eyeNor, dev_primitives[pid].v[2].eyeNor };
+		glm::vec2 triTexcoord0[3] = { dev_primitives[pid].v[0].texcoord0, dev_primitives[pid].v[1].texcoord0, dev_primitives[pid].v[2].texcoord0 };
+		glm::vec3 color(0.8f, 0.8f, 0.8f);
+
+		//after getting all the attributes, declare the AABB
+		//here since every primitive has a tri, so here every primitive has an aabb
+		AABB aabb = getAABBForTriangle(tri);
+		//find the bounding box value
+		//same as above, every primitive has its own xmin,xmax,ymin and ymax
+		int xmin = glm::max((int)aabb.min.x, 0);
+		int xmax = glm::min((int)aabb.max.x, width - 1);
+		int ymin = glm::max((int)aabb.min.y, 0);
+		int ymax = glm::min((int)aabb.max.y, height - 1);
+
+		int depth;
+		//declare a vec3 for calling calculateBarycentricCoordinate function
+		glm::vec3 barycoord;
+		int pixelid;
+		
+#if SOLID
+		//loop every fragment in the bounding box
+		for (int x = xmin; x <= xmax; x++)
+		{
+			for (int y = ymin; y <= ymax; y++)
+			{
+				glm::vec2 tmpfrag(x, y);
+				//calculate the barycentriccoordinate of x,y
+				barycoord = calculateBarycentricCoordinate(tri, tmpfrag);
+				//if it's inside the triangle
+				if (isBarycentricCoordInBounds(barycoord)==true)
+				{
+					//first get its pixel id to call atomicMin
+					pixelid = x + y*width;
+
+					//get z coordinate by calling getZAtCoordinate function. Times INT_MIN to convert it into
+					//integer for using atomicMin in the following.
+					depth = getZAtCoordinate(barycoord, tri) * INT_MIN;
+					//call atomicMin. If there is only one fragment on this pixel, nothing happens, the depth
+					//calculated will substitute the old value saved in dev_depth. If there are several fragments
+					//on this same pixel, the characteristic of atomicMin will work. Every time there will only
+					//be one thread get into this comparison and pass its value to dev_depth. Then another one gets in
+					atomicMin(&dev_depth[pixelid], depth);
+					int fragid = pixelid;
+
+					if (dev_depth[pixelid] == depth)
+					{
+						//pass the value calculated using barycentric method to fragmentbuffer.
+						glm::vec3 eyeposB = barycoord.x * triEyePos[0] + barycoord.y * triEyePos[1] + barycoord.z * triEyePos[2];
+						dev_fragmentBuffer[pixelid].eyePos = eyeposB;
+						glm::vec3 eyenorB = glm::normalize(barycoord.x * triEyeNor[0] + barycoord.y * triEyeNor[1] + barycoord.z * triEyeNor[2]);
+						dev_fragmentBuffer[pixelid].eyeNor = eyenorB;
+						glm::vec2 texB = barycoord.x * triTexcoord0[0] + barycoord.y * triTexcoord0[1] + barycoord.z * triTexcoord0[2];
+						dev_fragmentBuffer[pixelid].dev_diffuseTex = dev_primitives[pid].tex;
+						dev_fragmentBuffer[pixelid].texcoord0 = texB;
+						dev_fragmentBuffer[pixelid].diffuseTexHeight = dev_primitives[pid].diffuseTexHeight;
+						dev_fragmentBuffer[pixelid].diffuseTexWidth = dev_primitives[pid].diffuseTexWidth;
+
+						dev_fragmentBuffer[pixelid].color = barycoord.x * dev_primitives[pid].v[0].col + barycoord.y * dev_primitives[pid].v[1].col + barycoord.z * dev_primitives[pid].v[2].col;
+
+#if PERSPECTIVE
+						glm::vec3 zbuffer(1.f / triEyePos[0].z, 1.f / triEyePos[1].z, 1.f / triEyePos[2].z);
+						glm::vec3 bottom(barycoord.x * zbuffer[0], barycoord.y * zbuffer[1], barycoord.z * zbuffer[2]);
+						float sumbot = bottom.x + bottom.y + bottom.z;
+						float denominator_inv = 1.f / sumbot;
+						glm::vec2 numerator = bottom.x * triTexcoord0[0] + bottom.y * triTexcoord0[1] + bottom.z * triTexcoord0[2];
+						dev_fragmentBuffer[pixelid].texcoord0 = numerator * denominator_inv;
+#else
+						dev_fragmentBuffer[pixelid].texcoord0 = texB;
+#endif
+					}
+				}
+			}
+		}
+#elif POINTSHADING
+		for (int x = xmin; x <= xmax; x++)
+		{
+			for (int y = ymin; y < ymax; y++)
+			{
+				glm::vec2 tmpfrag(x, y);
+				//calculate the barycentriccoordinate of x,y
+				barycoord = calculateBarycentricCoordinate(tri, tmpfrag);
+				if (isBarycentricCoordOnCorner(barycoord))
+				{
+					//first get its pixel id to call atomicMin
+					pixelid = x + y*width;
+
+					//get z coordinate by calling getZAtCoordinate function. Times INT_MIN to convert it into
+					//integer for using atomicMin in the following.
+					depth = getZAtCoordinate(barycoord, tri) * INT_MIN;
+					//call atomicMin. If there is only one fragment on this pixel, nothing happens, the depth
+					//calculated will substitute the old value saved in dev_depth. If there are several fragments
+					//on this same pixel, the characteristic of atomicMin will work. Every time there will only
+					//be one thread get into this comparison and pass its value to dev_depth. Then another one gets in
+					atomicMin(&dev_depth[pixelid], depth);
+					int fragid = pixelid;
+
+					if (dev_depth[pixelid] == depth)
+					{
+						dev_fragmentBuffer[pixelid].color = glm::vec3(0.1, 1, 1);
+						//pass the value calculated using barycentric method to fragmentbuffer.
+						glm::vec3 eyeposB = barycoord.x * triEyePos[0] + barycoord.y * triEyePos[1] + barycoord.z * triEyePos[2];
+						dev_fragmentBuffer[pixelid].eyePos = eyeposB;
+						glm::vec3 eyenorB = glm::normalize(barycoord.x * triEyeNor[0] + barycoord.y * triEyeNor[1] + barycoord.z * triEyeNor[2]);
+						dev_fragmentBuffer[pixelid].eyeNor = eyenorB;
+						dev_fragmentBuffer[pixelid].dev_diffuseTex = NULL;
+					}
+				}
+			}
+		}
+#elif LINESHADING
+		for (int x = xmin; x <= xmax; x++)
+		{
+			for (int y = ymin; y < ymax; y++)
+			{
+				glm::vec2 tmpfrag(x, y);
+				//calculate the barycentriccoordinate of x,y
+				barycoord = calculateBarycentricCoordinate(tri, tmpfrag);
+				if (isBarycentricCoordOnBounds(barycoord))
+				{
+					//first get its pixel id to call atomicMin
+					pixelid = x + y*width;
+
+					//get z coordinate by calling getZAtCoordinate function. Times INT_MIN to convert it into
+					//integer for using atomicMin in the following.
+					depth = getZAtCoordinate(barycoord, tri) * INT_MIN;
+					//call atomicMin. If there is only one fragment on this pixel, nothing happens, the depth
+					//calculated will substitute the old value saved in dev_depth. If there are several fragments
+					//on this same pixel, the characteristic of atomicMin will work. Every time there will only
+					//be one thread get into this comparison and pass its value to dev_depth. Then another one gets in
+					atomicMin(&dev_depth[pixelid], depth);
+					int fragid = pixelid;
+
+					if (dev_depth[pixelid] == depth)
+					{
+						dev_fragmentBuffer[pixelid].color = glm::vec3(0.1, 1, 1);
+						//pass the value calculated using barycentric method to fragmentbuffer.
+						glm::vec3 eyeposB = barycoord.x * triEyePos[0] + barycoord.y * triEyePos[1] + barycoord.z * triEyePos[2];
+						dev_fragmentBuffer[pixelid].eyePos = eyeposB;
+						glm::vec3 eyenorB = glm::normalize(barycoord.x * triEyeNor[0] + barycoord.y * triEyeNor[1] + barycoord.z * triEyeNor[2]);
+						dev_fragmentBuffer[pixelid].eyeNor = eyenorB;
+						dev_fragmentBuffer[pixelid].dev_diffuseTex = NULL;
+					}
+				}
+			}
+		}
+#endif
+
+	}
+}
+
+
+
+
+
+
+
+
 /**
  * Perform rasterization.
  */
@@ -706,7 +1050,7 @@ void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const g
 				checkCUDAError("Vertex Processing");
 				cudaDeviceSynchronize();
 				_primitiveAssembly << < numBlocksForIndices, numThreadsPerBlock >> >
-					(p->numIndices, 
+					(p->numIndices,
 					curPrimitiveBeginId, 
 					dev_primitives, 
 					*p);
@@ -723,8 +1067,10 @@ void rasterize(uchar4 *pbo, const glm::mat4 & MVP, const glm::mat4 & MV, const g
 	initDepth << <blockCount2d, blockSize2d >> >(width, height, dev_depth);
 	
 	// TODO: rasterize
-
-
+	dim3 numThreadsPerBlock(128);
+	dim3 numBlocksForPrimitives((totalNumPrimitives + numThreadsPerBlock.x - 1) / numThreadsPerBlock.x);
+	_rasterization <<<numBlocksForPrimitives, numThreadsPerBlock>>>(totalNumPrimitives, dev_primitives, dev_fragmentBuffer, dev_depth, width, height);
+	checkCUDAError("_rasterization");
 
     // Copy depthbuffer colors into framebuffer
 	render << <blockCount2d, blockSize2d >> >(width, height, dev_fragmentBuffer, dev_framebuffer);
diff --git a/src/rasterizeTools.h b/src/rasterizeTools.h
index 46c701e..cba4638 100644
--- a/src/rasterizeTools.h
+++ b/src/rasterizeTools.h
@@ -88,6 +88,27 @@ bool isBarycentricCoordInBounds(const glm::vec3 barycentricCoord) {
            barycentricCoord.z >= 0.0 && barycentricCoord.z <= 1.0;
 }
 
+
+__host__ __device__ static
+bool isBarycentricCoordOnBounds(const glm::vec3 barycentricCoord) {
+	if (glm::abs(1 - glm::abs(barycentricCoord.x + barycentricCoord.y)) <= 0.07f && glm::abs(barycentricCoord.z) <= 0.07f) return true;
+	if (glm::abs(1 - glm::abs(barycentricCoord.y + barycentricCoord.z)) <= 0.07f && glm::abs(barycentricCoord.x) <= 0.07f) return true;
+	if (glm::abs(1 - glm::abs(barycentricCoord.z + barycentricCoord.x)) <= 0.07f && glm::abs(barycentricCoord.y) <= 0.07f) return true;
+	return false;
+}
+
+__host__ __device__ static
+bool isBarycentricCoordOnCorner(const glm::vec3 barycentricCoord) {
+	if (glm::abs(1 - glm::abs(barycentricCoord.x)) < 0.11f && glm::abs(barycentricCoord.y) < 0.11 && glm::abs(barycentricCoord.z) < 0.11) return true;
+	if (glm::abs(1 - glm::abs(barycentricCoord.y)) < 0.11f && glm::abs(barycentricCoord.x) < 0.11 && glm::abs(barycentricCoord.z) < 0.11) return true;
+	if (glm::abs(1 - glm::abs(barycentricCoord.z)) < 0.11f && glm::abs(barycentricCoord.y) < 0.11 && glm::abs(barycentricCoord.x) < 0.11) return true;
+	return false;
+}
+
+
+
+
+
 // CHECKITOUT
 /**
  * For a given barycentric coordinate, compute the corresponding z position