-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathExtractTermFeaturesScores.sh
More file actions
41 lines (38 loc) · 1.22 KB
/
ExtractTermFeaturesScores.sh
File metadata and controls
41 lines (38 loc) · 1.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
if [ -z "$1" ] || [ -z "$2" ] ; then
echo "Extract word feature counts from the input text corpus"
echo "parameters: <corpus-directory> <output-directory>"
exit
fi
bin_hadoop=`echo target/lib/*.jar target/lefex*.jar` # binaries are in the default location (mvn package)
hadoop_xmx_mb=8192
hadoop_mb=8000
queue=shortrunning
corpus=$1
output=$2
echo "Corpus: $corpus"
if hadoop fs -test -e $corpus ; then
echo "Corpus exists: true"
else
echo "Corpus exists: false"
fi
echo "Output: $output"
echo "To start press any key, to stop press Ctrl+C"
read -n 2
jars=`$bin_hadoop | tr " " ","`
path=`$bin_hadoop | tr " " ":"`
HADOOP_CLASSPATH=$path hadoop \
de.uhh.lt.lefex.ExtractTermFeatureScores.HadoopMain \
-libjars $jars \
-Dmapreduce.reduce.failures.maxpercent=10 \
-Dmapreduce.map.failures.maxpercent=10 \
-Dmapreduce.job.queuename=$queue\
-Dmapreduce.map.java.opts=-Xmx${hadoop_xmx_mb}m \
-Dmapreduce.map.memory.mb=$hadoop_mb \
-Dmapreduce.reduce.java.opts=-Xmx${hadoop_xmx_mb}m \
-Dmapreduce.reduce.memory.mb=$hadoop_mb \
-Dmapred.max.split.size=2000000 \
-Dholing.mwe.vocabulary=src/main/resources/data/voc-ner.csv \
-Dholing.mwe.ner=true \
$corpus \
$output \
false