VisionLearningGroup · likelyzhao · Dec 20, 2017 · Jan 29, 2018
diff --git a/caffe3d/Makefile.config b/caffe3d/Makefile.config
@@ -18,7 +18,7 @@ USE_CUDNN := 1
 # ALLOW_LMDB_NOLOCK := 1
 
 # Uncomment if you're using OpenCV 3
-# OPENCV_VERSION := 3
+OPENCV_VERSION := 3
 
 # To customize your choice of compiler, uncomment and set the following.
 # N.B. the default for Linux is g++ and the default for OSX is clang++
@@ -90,8 +90,8 @@ PYTHON_LIB := /usr/lib
 WITH_PYTHON_LAYER := 1
 
 # Whatever else you find you need goes here.
-INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include $(CUDNN_PATH)/include
-LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib $(CUDNN_PATH)/lib64
+INCLUDE_DIRS := $(PYTHON_INCLUDE) /usr/local/include $(CUDNN_PATH)/include /usr/include/hdf5/serial/
+LIBRARY_DIRS := $(PYTHON_LIB) /usr/local/lib /usr/lib $(CUDNN_PATH)/lib64 /usr/lib/x86_64-linux-gnu/hdf5/serial/
 
 # If Homebrew is installed at a non standard location (for example your home directory) and you use it for general dependencies
 # INCLUDE_DIRS += $(shell brew --prefix)/include

diff --git a/caffe3d/include/caffe/blob.hpp b/caffe3d/include/caffe/blob.hpp
@@ -137,10 +137,10 @@ class Blob {
   /// @brief Deprecated legacy shape accessor width: use shape(3) instead.
   inline int width() const { return LegacyShape(3); }
   inline int LegacyShape(int index) const {
-    CHECK_LE(num_axes(), 4)
+    CHECK_LE(num_axes(), 5)
         << "Cannot use legacy accessors on Blobs with > 4 axes.";
-    CHECK_LT(index, 4);
-    CHECK_GE(index, -4);
+    CHECK_LT(index, 5);
+    CHECK_GE(index, -5);
     if (index >= num_axes() || index < -num_axes()) {
       // Axis is out of range, but still in [0, 3] (or [-4, -1] for reverse
       // indexing) -- this special case simulates the one-padding used to fill

diff --git a/caffe3d/include/caffe/util/cudnn.hpp b/caffe3d/include/caffe/util/cudnn.hpp
@@ -158,8 +158,14 @@ template <typename Dtype>
 inline void setConvolutionDesc(cudnnConvolutionDescriptor_t* conv,
     cudnnTensorDescriptor_t bottom, cudnnFilterDescriptor_t filter,
     int pad_h, int pad_w, int stride_h, int stride_w) {
+#if CUDNN_VERSION_MIN(6, 0, 0)
   CUDNN_CHECK(cudnnSetConvolution2dDescriptor(*conv,
+      pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION,
+      dataType<Dtype>::type));
+#else
+    CUDNN_CHECK(cudnnSetConvolution2dDescriptor(*conv,
       pad_h, pad_w, stride_h, stride_w, 1, 1, CUDNN_CROSS_CORRELATION));
+#endif
 }
 
 template <typename Dtype>

diff --git a/experiments/activitynet/script_train.sh b/experiments/activitynet/script_train.sh
@@ -2,7 +2,7 @@
 
 export PYTHONUNBUFFERED=true
 
-GPU_ID=1
+GPU_ID=0
 EX_DIR=activitynet
 
 LOG="experiments/${EX_DIR}/log.txt.`date +'%Y-%m-%d_%H-%M-%S'`"

diff --git a/lib/tdcnn/train.py b/lib/tdcnn/train.py
@@ -10,6 +10,7 @@
 import caffe
 from tdcnn.config import cfg
 import roi_data_layer.roidb as rdl_roidb
+import google.protobuf.text_format
 from utils.timer import Timer
 import numpy as np
 import os
@@ -114,7 +115,7 @@ def train_net(solver_prototxt, roidb, output_dir,
               pretrained_model=None, max_iters=40000):
     """Train a R-C3D network."""
 
-    roidb = filter_roidb(roidb)
+    # roidb = filter_roidb(roidb)
     sw = SolverWrapper(solver_prototxt, roidb, output_dir,
                        pretrained_model=pretrained_model)
 

diff --git a/preprocess/activityNet/Activity.json b/preprocess/activityNet/Activity.json
diff --git a/preprocess/activityNet/download_ava_video.py b/preprocess/activityNet/download_ava_video.py
@@ -0,0 +1,24 @@
+# --------------------------------------------------------
+# R-C3D
+# Copyright (c) 2017 Boston University
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Huijuan Xu
+# --------------------------------------------------------
+
+import json
+import os
+
+annotation_file = open('Activity.json')
+
+
+for line in annotation_file.readlines():
+    down = json.loads(line)
+    url = down['url']
+    filename = url.split('/')[-1]
+    host = url.split('/')[0]
+    command3 = 'curl http://xsio.qiniu.io/' + filename + ' -H \'Host:' + host + '\' -o videos/' + filename
+    print command3
+    os.system(command3)
+
+
+
diff --git a/preprocess/activityNet/generate_frames.py b/preprocess/activityNet/generate_frames.py
@@ -20,7 +20,7 @@
 mkdir('./frames')
 def generate_frame(split):
   mkdir('./frames/%s' % split)
-  for vid, vinfo in meta_data['database'].iteritems():
+  for vid, vinfo in meta_data['database'].items():
     if vinfo['subset'] == split:
       vname = [s for s in video_list if vid in s]
       if len(vname) != 0 :
@@ -34,7 +34,7 @@ def generate_frame(split):
         for framename in os.listdir(outpath):
           resize(outpath + framename)
         frame_size = len(os.listdir(outpath))
-        print filename, duration, fps, frame_size
+        # print filename, duration, fps, frame_size
 
 generate_frame('training')
 generate_frame('validation')

diff --git a/preprocess/activityNet/generate_roidb_training.py b/preprocess/activityNet/generate_roidb_training.py
@@ -2,7 +2,7 @@
 # R-C3D
 # Copyright (c) 2017 Boston University
 # Licensed under The MIT License [see LICENSE for details]
-# Written by Huijuan Xu
+# WrittccPen by Huijuan Xu
 # --------------------------------------------------------
 
 import os
@@ -23,10 +23,10 @@
 META_FILE = './activity_net.v1-3.min.json'
 data = json.load(open(META_FILE))
 
-print 'Generate Classes'
+print('Generate Classes')
 classes = generate_classes(data)
 
-print 'Generate Training Segments'
+print('Generate Training Segments')
 train_segment = generate_segment('training', data, classes)
 
 path = './preprocess/activityNet/frames/'
@@ -42,7 +42,7 @@ def generate_roi(rois, video, start, end, stride, split):
   tmp['bg_name'] = path + split + '/' + video
   tmp['fg_name'] = path + split + '/' + video
   if not os.path.isfile('../../' + tmp['bg_name'] + '/image_' + str(end-1).zfill(5) + '.jpg'):
-    print '../../' + tmp['bg_name'] + '/image_' + str(end-1).zfill(5) + '.jpg'
+    print('../../' + tmp['bg_name'] + '/image_' + str(end-1).zfill(5) + '.jpg')
     raise
   return tmp
 
@@ -128,6 +128,6 @@ def generate_roidb(split, segment):
 USE_FLIPPED = True      
 train_roidb = generate_roidb('training', train_segment)
 
-print "Save dictionary"
+print("Save dictionary")
 cPickle.dump(train_roidb, open('train_data_3fps_flipped.pkl','w'), cPickle.HIGHEST_PROTOCOL)
 
diff --git a/preprocess/activityNet/generate_roidb_validation.py b/preprocess/activityNet/generate_roidb_validation.py
@@ -44,7 +44,9 @@ def generate_roidb(split):
   for i,vid in enumerate(video_list):
     print i
     length = len(os.listdir('./frames/' + split + '/' + vid))
-
+    if length ==0:
+       print('length =0 in' + str(vid))
+       continue
     for win in WINS:
       stride = win / LENGTH
       step = stride * STEP

diff --git a/preprocess/activityNet/util.py b/preprocess/activityNet/util.py
@@ -12,7 +12,7 @@
 
 def generate_classes(data):
   class_list = []
-  for vid, vinfo in data['database'].iteritems():
+  for vid, vinfo in data['database'].items():
     for item in vinfo['annotations']:
       class_list.append(item['label'])
 
@@ -28,7 +28,7 @@ def generate_segment(split, data, classes):
   VIDEO_PATH = 'frames/%s/' % split
   video_list = set(os.listdir(VIDEO_PATH))
   # get time windows based on video key
-  for vid, vinfo in data['database'].iteritems():
+  for vid, vinfo in data['database'].items():
     vid_name = [v for v in video_list if vid in v]
     if len(vid_name) == 1:
       if vinfo['subset'] == split: