TextingAnalyzer/extract.py at master · jamedero/TextingAnalyzer · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python3

import csv
import sys
import re
import os
import argparse
from Texting import Dictionary, Conversation, Utterance, Normalizer
from FeatureExtractors import *

"""
Load a conversation and one or more Annotators, then process each line
in the conversation. Command-line options determind which features
are output for the conversation.
"""

def main(args):
    annotators = []

    thisNorm = Normalizer(args.norm)


    if args.survey:
        annotators.append(CSVFeatures(args.survey))
    if args.activedays or args.allfeatures:
        annotators.append(ActiveDays())
    if args.countwords or args.allfeatures:
        annotators.append(CountWords())
    if args.countpos or args.allfeatures:
        annotators.append(CountPOS())
    if args.dict:
        thisDict = Dictionary(args.dict)
        annotators.append(DictionaryFeatureExtractor(thisDict))
    if args.responsetimes or args.allfeatures:
        annotators.append(ElapsedTime())
    if args.timeofday or args.allfeatures:
        annotators.append(TimeOfDay())

    need_header = True
    for csvFile in args.textfiles:
        processFile(csvFile, args.time, thisNorm, annotators, need_header)
        need_header = False

def processFile(fname, numDays, thisNorm, annotators, need_header):

    conversation = Conversation(fname, norm=thisNorm)
    conversation.limitTimeRange(numDays)

    if annotators and conversation.utterances:
        for annotator in annotators:
            conversation.addAnnotator(annotator)

        conversation.groupFeatures()

        conversation.writeFeatures(args.out, need_header, os.path.basename(fname))

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Extract features from text data files.')
    parser.add_argument('textfiles', metavar='FILE.csv', nargs='+',
                    help='a text thread in CSV file that should be analyzed')
    parser.add_argument('--dict', '-d', metavar='FILE.dic', help='a dictionary file in .dic format')
    parser.add_argument('--survey', '-s', metavar='SURVEY.csv', help='survey results file in .csv format')
    parser.add_argument('--out', '-o', metavar='FILE', help='Write results to FILE in .csv format', default="all_features.csv")
    parser.add_argument('--norm', '-n', metavar='NORM.dic', help='a tab-delimited set of replacements')
    parser.add_argument('--time', '-t', metavar='N', type=int, help='number of days to process', default=14)
    parser.add_argument('--countwords', '-w', action='store_true')
    parser.add_argument('--countpos', '-p', action='store_true')
    parser.add_argument('--responsetimes', '-r', action='store_true')
    parser.add_argument('--timehist', action='store_true')
    parser.add_argument('--timeofday', action='store_true')
    parser.add_argument('--activedays', action='store_true')
    parser.add_argument('--allfeatures', action='store_true')
    args = parser.parse_args()

    main(args)