-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgetHuntleys.sh
More file actions
executable file
·128 lines (114 loc) · 3.57 KB
/
getHuntleys.sh
File metadata and controls
executable file
·128 lines (114 loc) · 3.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#!/usr/bin/bash
if [ -z "$1" -o -z "$2" -o -z "$3" -o -z "$4" -o -z "$5" ]; then
echo Usage: getHuntleys.sh [output dir] [YEAR] [email] [password] [accountname]
exit -1;
else
OPDIR="$1"
YEAR="$2"
EMAIL="$3"
PASSWORD="$4"
NAME="$5"
TMPDIR=${OPDIR}/tmp
fi
SITE="http://www.morningstar.com.au"
mkdir -p $TMPDIR
COOKIES=$TMPDIR/cookies.txt
rm -f $COOKIES
touch $COOKIES
FINAL=$TMPDIR/final.txt
doLogin() {
WELCOME=`wget -qO- \
--server-response \
--load-cookies $COOKIES \
--keep-session-cookies \
--save-cookies $COOKIES \
'https://www.morningstar.com.au'`
echo $WELCOME > ~/ms/login.txt
}
doLogin1() {
wget -qO- \
--server-response \
--keep-session-cookies \
--save-cookies $COOKIES \
--header='Host: www.morningstar.com.au' \
--header='https://www.morningstar.com.au/Security/Login' \
--post-data 'UserName='${EMAIL}'&Password='${PASSWORD}'&LoginSubmit=Login' https://www.morningstar.com.au/Security/Login > /dev/null
echo Login posted. Checking...
WELCOME=`wget -qO- \
--server-response \
--load-cookies $COOKIES \
--keep-session-cookies \
--save-cookies $COOKIES \
'https://www.morningstar.com.au/Common/IframeHeader?ControllerToInject=About&ReqPath=About.mvc/Fsg&introad'
# \
# |xmllint --html --recover - \
# |xmllint --html -xpath "//span [@class='welcome']/text()" -`
echo $WELCOME > ~/ms/login.txt
SITE_USER=`echo $WELCOME | awk '{print $2}'`
}
doGetFiles() {
wget -O- \
--server-response \
--load-cookies $COOKIES \
--keep-session-cookies \
--save-cookies $COOKIES \
https://www.morningstar.com.au/Stocks/YMW/Archive/${YEAR} \
|xmllint --html --recover - \
|xmllint --html -xpath "//table [@id='ProfileTable']/tbody/tr/td[2]/text()|//table [@id='ProfileTable']/tbody/tr/td[5]/a/@href" - \
| sed -e 's/.$//' -e '/^$/d' -e 's/^ *//' -e 's/YMW //' -e 's/^href="//' -e 's/"$//' \
| sed -e ':a' -e 'N' -e '$!ba' -e 's/\n/:/g' -e 's/:\(..\):/\n\1:/g' > $FINAL
}
doLoginValidate() {
doLogin;
if [ "${SITE_USER}" != "${NAME}" ]; then
doLogin;
if [ "${SITE_USER}" != "${NAME}" ]; then
echo Could not login.
exit;
fi
fi
}
doLoginValidate;
echo Logged in ok.
doGetFiles &> /dev/null
if [[ ! -d $OPDIR/${YEAR} ]]; then
mkdir $OPDIR/${YEAR}
fi
for LINE in `cat $FINAL`
do
wknum=`echo $LINE | awk -F: '{print $1}'`
DOC=`echo $LINE | awk -F: '{print $2}'`
ver=$wknum
#ver=`printf "%2d" $wknum`
filename=${YEAR}YMW${ver}.pdf
if [[ -w $OPDIR/${YEAR} ]]; then
if [[ ! -e $OPDIR/${YEAR}/${filename} ]]; then
echo get the file $OPDIR/${YEAR}/${filename}
echo $OPDIR/${YEAR}/${filename}
echo $LINE
wget -O $OPDIR/${YEAR}/${filename} \
--load-cookies ${COOKIES} \
--keep-session-cookies \
--save-cookies ${COOKIES} \
"${SITE}$DOC"
fi
else
echo Can not write to ${YEAR} dir.
fi
#if file is empty remove it.
if [[ ! -a ${YEAR}/${filename} ]]; then
echo ${YEAR}/${filename} does not exist.
else
declare -i pdfSize=`du -B 1 ./${YEAR}/${filename} |awk '{print $1}'`
if [[ ${pdfSize} -lt 20 ]]; then
rm ${YEAR}/${filename}
fi
fi;
done;
echo Do Logout..
wget -O- \
--server-response \
--load-cookies $COOKIES \
--keep-session-cookies \
--save-cookies $COOKIES \
https://www.morningstar.com.au/Security/LogOut > /dev/null