-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathannoformat.rkt
More file actions
135 lines (105 loc) · 3.69 KB
/
annoformat.rkt
File metadata and controls
135 lines (105 loc) · 3.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#lang racket
(require sxml)
(require srfi/13)
(define test-file "test.annotations.xml")
(define (get-sxml filename)
(ssax:xml->sxml (open-input-file filename) '()))
(define find-annotations
(sxpath "/document/annotations/annotation"))
(define (first-log-data xml)
(if (not (null? (find-annotations xml)))
(sxml:attr (car (find-annotations xml))
'log_data)
""))
(define url-key "&a-v=")
(define (find-video-url logstr)
(if (string-contains logstr url-key)
(let* ((start (+ 5 (string-contains logstr url-key)))
(end (+ start 11)))
(string-append "https://youtu.be/" (substring logstr start end)))
""))
(define (get-url xml)
(find-video-url (first-log-data xml)))
(define find-text
(sxpath "/document/annotations/annotation/TEXT"))
(define (break-text text-list)
(map (lambda (txt)
(string-append (sxml:text txt) "\n\n"))
text-list))
;; testing
(define (print-text text-list)
(map (lambda (txt)
(display (string-append (sxml:text txt) "\n\n")))
text-list))
(define find-time
(sxpath "/document/annotations/annotation/segment/movingRegion/rectRegion"))
(define (every-other li)
(let ((c 0))
(filter (lambda (x)
(begin
(set! c (+ c 1))
(odd? c)))
li)))
(define (extract-t rect-region)
(second (fifth (second rect-region))))
(define (break-time xml)
(map (lambda (rr)
(string-append (extract-t rr) "\n"))
(every-other (find-time xml))))
;; zips lists of unequal length
(define (interleave list1 list2)
(cond ((empty? list1) list2)
((empty? list2) list1)
(else
(append
(list (car list1) (car list2))
(interleave (cdr list1) (cdr list2))))))
;; zips formatted timecodes and text
(define (combine-time-text xml)
(interleave (break-time xml)
(break-text (find-text xml))))
(define (display-time-text ttl out)
(map (lambda (tt)
(display tt out))
ttl))
;;; I/O
(define input-dir (build-path (find-system-path 'orig-dir)
"CIR Annotations\\"))
(define output-dir "CIR Annotations Reformatted\\")
(define (in-out in-path out-path)
(call-with-input-file in-path
(lambda (input)
(call-with-output-file out-path
(lambda (output)
(let ((sxml (ssax:xml->sxml input '())))
(begin
(display (string-append (get-url sxml) "\n\n")
output)
(display-time-text (combine-time-text sxml)
output))))
#:mode 'text
#:exists 'replace))
#:mode 'text))
;; takes input path and generates output path
(define (transform-path in-path)
(let-values ([(base filename drop) (split-path in-path)])
(let-values ([(parent olddir drop) (split-path base)])
(build-path parent output-dir (path-replace-extension filename ".txt")))))
(define (do-files directory proc)
(map proc
(directory-list directory
#:build? #t)))
(define (reformat-all)
(do-files input-dir
(lambda (in-path)
(in-out in-path (transform-path in-path)))))
;;; Some tests on general annotation structure
(define (text-count in-file)
(length (find-text (ssax:xml->sxml in-file '()))))
(define (time-count in-file)
(length (find-time (ssax:xml->sxml in-file '()))))
(define (text-per-timestamp xml)
(= (length (find-text xml))
(* 2 (length (find-time xml)))))
(define (count-text-per-timestamp in-file)
(text-per-timestamp (ssax:xml->sxml in-file '())))