Skip to content

Commit 63e01a8

Browse files
authored
Merge pull request #3 from dranih/dev
Dev
2 parents 6b1a931 + 2166bbe commit 63e01a8

412 files changed

Lines changed: 85400 additions & 15688 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

cmd/gowap/main.go

Lines changed: 26 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,34 @@
11
package main
22

33
import (
4+
"bytes"
45
"encoding/json"
56
"flag"
67
"fmt"
78
"os"
89

910
gowap "github.com/dranih/gowap/pkg/core"
10-
log "github.com/sirupsen/logrus"
1111
)
1212

1313
func main() {
1414

15-
var url, appsJSONPath, scraper string
16-
var help, rawOutput bool
15+
var url, appsJSONPath, scraper, userAgent string
16+
var help, pretty bool
1717
var timeoutSeconds, loadingTimeoutSeconds, maxDepth, maxVisitedLinks, msDelayBetweenRequests int
1818
flag.StringVar(&appsJSONPath, "file", "", "Path to override default technologies.json file")
1919
flag.StringVar(&scraper, "scraper", "rod", "Choose scraper between rod (default) and colly")
20+
flag.StringVar(&userAgent, "useragent", "", "Override the user-agent string")
2021
flag.IntVar(&timeoutSeconds, "timeout", 3, "Timeout in seconds for fetching the url")
2122
flag.IntVar(&loadingTimeoutSeconds, "loadtimeout", 3, "Timeout in seconds for loading the page")
2223
flag.IntVar(&maxDepth, "depth", 0, "Don't analyze page when depth superior to this number. Default (0) means no recursivity (only first page will be analyzed)")
2324
flag.IntVar(&maxVisitedLinks, "maxlinks", 5, "Max number of pages to visit. Exit when reached")
2425
flag.IntVar(&msDelayBetweenRequests, "delay", 100, "Delay in ms between requests")
25-
flag.BoolVar(&rawOutput, "raw", false, "Raw output (JSON by default)")
26+
flag.BoolVar(&pretty, "pretty", false, "Pretty print json output")
2627
flag.BoolVar(&help, "h", false, "Help")
2728
flag.Parse()
2829

2930
var Usage = func() {
30-
fmt.Println("Usage : gowap [options] <url>")
31+
fmt.Fprintln(os.Stderr, "Usage : gowap [options] <url>")
3132
flag.PrintDefaults()
3233
}
3334

@@ -36,43 +37,53 @@ func main() {
3637
os.Exit(1)
3738
}
3839
if flag.NArg() == 0 {
39-
fmt.Println("You must specify a url to analyse")
40+
fmt.Fprintln(os.Stderr, "You must specify a url to analyse")
4041
Usage()
4142
os.Exit(1)
4243
} else if flag.NArg() > 1 {
43-
fmt.Printf("Too many arguments %s", flag.Args())
44+
fmt.Fprintf(os.Stderr, "Too many arguments %s", flag.Args())
4445
Usage()
4546
os.Exit(1)
4647
} else {
4748
url = flag.Arg(0)
4849
}
4950
if scraper != "rod" && scraper != "colly" {
50-
fmt.Printf("Unknown scraper %s : only supporting rod and colly", scraper)
51+
fmt.Fprintf(os.Stderr, "Unknown scraper %s : only supporting rod and colly", scraper)
5152
Usage()
5253
os.Exit(1)
5354
}
5455

5556
config := gowap.NewConfig()
5657
config.AppsJSONPath = appsJSONPath
57-
config.JSON = !rawOutput
5858
config.TimeoutSeconds = timeoutSeconds
5959
config.LoadingTimeoutSeconds = loadingTimeoutSeconds
6060
config.MaxDepth = maxDepth
6161
config.MaxVisitedLinks = maxVisitedLinks
6262
config.MsDelayBetweenRequests = msDelayBetweenRequests
6363
config.Scraper = scraper
64+
if userAgent != "" {
65+
config.UserAgent = userAgent
66+
}
6467

6568
wapp, err := gowap.Init(config)
6669
if err != nil {
67-
log.Errorln(err)
70+
fmt.Fprintln(os.Stderr, err)
71+
os.Exit(1)
6872
}
6973
res, err := wapp.Analyze(url)
7074
if err != nil {
71-
log.Errorln(err)
75+
fmt.Fprintln(os.Stderr, err)
76+
os.Exit(1)
7277
}
73-
prettyJSON, err := json.MarshalIndent(res, "", " ")
74-
if err != nil {
75-
log.Errorln(err)
78+
if pretty {
79+
var prettyJSON bytes.Buffer
80+
err = json.Indent(&prettyJSON, []byte(res.(string)), "", " ")
81+
if err != nil {
82+
fmt.Fprintln(os.Stderr, err)
83+
}
84+
fmt.Println(&prettyJSON)
85+
} else {
86+
fmt.Println(res)
87+
7688
}
77-
log.Infof("[*] Result for %s:\n%s", url, string(prettyJSON))
7889
}

go.mod

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,25 @@ go 1.16
44

55
require (
66
github.com/PuerkitoBio/goquery v1.6.1
7+
github.com/andybalholm/cascadia v1.2.0 // indirect
78
github.com/antchfx/htmlquery v1.2.3 // indirect
89
github.com/antchfx/xmlquery v1.3.5 // indirect
9-
github.com/go-rod/rod v0.94.1
10+
github.com/antchfx/xpath v1.1.11 // indirect
11+
github.com/go-rod/rod v0.96.1
1012
github.com/gobwas/glob v0.2.3 // indirect
1113
github.com/gocolly/colly v1.2.0
14+
github.com/golang/protobuf v1.5.1 // indirect
1215
github.com/json-iterator/go v1.1.10
1316
github.com/kennygrant/sanitize v1.2.4 // indirect
17+
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
18+
github.com/modern-go/reflect2 v1.0.1 // indirect
1419
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
15-
github.com/sirupsen/logrus v1.4.2
16-
github.com/stretchr/testify v1.3.0
20+
github.com/sirupsen/logrus v1.8.1
21+
github.com/stretchr/testify v1.7.0
1722
github.com/temoto/robotstxt v1.1.1 // indirect
18-
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110 // indirect
19-
golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43 // indirect
23+
go.zoe.im/surferua v0.0.3
24+
golang.org/x/net v0.0.0-20210324205630-d1beb07c2056 // indirect
25+
golang.org/x/text v0.3.5 // indirect
2026
google.golang.org/appengine v1.6.7 // indirect
27+
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
2128
)

go.sum

Lines changed: 41 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,55 @@
11
github.com/PuerkitoBio/goquery v1.6.1 h1:FgjbQZKl5HTmcn4sKBgvx8vv63nhyhIpv7lJpFGCWpk=
22
github.com/PuerkitoBio/goquery v1.6.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
3-
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
43
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
4+
github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE=
5+
github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY=
56
github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M=
67
github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
78
github.com/antchfx/xmlquery v1.3.5 h1:I7TuBRqsnfFuL11ruavGm911Awx9IqSdiU6W/ztSmVw=
89
github.com/antchfx/xmlquery v1.3.5/go.mod h1:64w0Xesg2sTaawIdNqMB+7qaW/bSqkQm+ssPaCMWNnc=
910
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
10-
github.com/antchfx/xpath v1.1.10 h1:cJ0pOvEdN/WvYXxvRrzQH9x5QWKpzHacYO8qzCcDYAg=
1111
github.com/antchfx/xpath v1.1.10/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
12+
github.com/antchfx/xpath v1.1.11 h1:WOFtK8TVAjLm3lbgqeP0arlHpvCEeTANeWZ/csPpJkQ=
13+
github.com/antchfx/xpath v1.1.11/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
1214
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
1315
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
1416
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
15-
github.com/go-rod/rod v0.94.1 h1:sAvY47PXDmkfZlotFbIRip257XDzgpLBkHuct65U4Ps=
16-
github.com/go-rod/rod v0.94.1/go.mod h1:/W4lcZiCALPD603MnJGIvhtywP3R6yRB9EDfFfsHiiI=
17+
github.com/go-rod/rod v0.96.1 h1:hvpz+AszyCt6YN1PDjfT4QhDXmcQwUusNt6pAo5xqAA=
18+
github.com/go-rod/rod v0.96.1/go.mod h1:cIR84WFrMAwShOkmIJcuJZDyKLjQq3yiZSMEhawHktc=
1719
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
1820
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
1921
github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
2022
github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
2123
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
2224
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
23-
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
2425
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
26+
github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk=
27+
github.com/golang/protobuf v1.5.1 h1:jAbXjIeW2ZSW2AwFxlGTDoc2CjI2XujLkV3ArsZFCvc=
28+
github.com/golang/protobuf v1.5.1/go.mod h1:DopwsBzvsk0Fs44TXzsVbJyPhcCPeIwnvohx4u74HPM=
29+
github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU=
30+
github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE=
2531
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
2632
github.com/json-iterator/go v1.1.10 h1:Kz6Cvnvv2wGdaG/V8yMvfkmNiXq9Ya2KUv4rouJJr68=
2733
github.com/json-iterator/go v1.1.10/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
2834
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
2935
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
30-
github.com/konsorten/go-windows-terminal-sequences v1.0.1 h1:mweAR1A6xJ3oS2pRaGiHgQ4OO8tzTaLawm8vnODuwDk=
31-
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
32-
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
3336
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
34-
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg=
37+
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
38+
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
3539
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
40+
github.com/modern-go/reflect2 v1.0.1 h1:9f412s+6RmYXLWZSEzVVgPGK7C2PphHj5RJrvfx9AWI=
41+
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
3642
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
3743
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
3844
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
3945
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
40-
github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4=
41-
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
46+
github.com/sirupsen/logrus v1.8.1 h1:dJKuHgqk1NNQlqoA6BTlM1Wf9DOH3NBjQyu0h9+AZZE=
47+
github.com/sirupsen/logrus v1.8.1/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0=
4248
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
43-
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
4449
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
45-
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
4650
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
51+
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
52+
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
4753
github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA=
4854
github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
4955
github.com/ysmood/goob v0.3.0 h1:XZ51cZJ4W3WCoCiUktixzMIQF86W7G5VFL4QQ/Q2uS0=
@@ -54,8 +60,10 @@ github.com/ysmood/gotrace v0.2.0 h1:IkTC6rJREwXSaG8yWK+NFwIJGIsxA1DjC6/gxYyQttE=
5460
github.com/ysmood/gotrace v0.2.0/go.mod h1:TzhIG7nHDry5//eYZDYcTzuJLYQIkykJzCRIo4/dzQM=
5561
github.com/ysmood/gson v0.6.3 h1:4cU+5oOdsyundXHy00t99H0rLXLthuseD3x6W+xmCiU=
5662
github.com/ysmood/gson v0.6.3/go.mod h1:3Kzs5zDl21g5F/BlLTNcuAGAYLKt2lV5G8D1zF3RNmg=
57-
github.com/ysmood/leakless v0.6.12 h1:XxtRYl97bJklfv4BZVdyGnd/y42p6w8lu1hUzfCkT4M=
58-
github.com/ysmood/leakless v0.6.12/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
63+
github.com/ysmood/leakless v0.6.14 h1:+RKbekKP9+WzVSylTlhwp2x9+zSojKOiGUYewgZyO8g=
64+
github.com/ysmood/leakless v0.6.14/go.mod h1:R8iAXPRaG97QJwqxs74RdwzcRHT1SWCGTNqY8q0JvMQ=
65+
go.zoe.im/surferua v0.0.3 h1:1hYCP7Y8C+o0vVU0nRthyjayirnTzRlhTMXAu7uC/+4=
66+
go.zoe.im/surferua v0.0.3/go.mod h1:vJbtp13ZMCGK/wl6qCVRi8WtcG1ySzS9u9lfKUnW2Hw=
5967
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
6068
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
6169
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
@@ -64,20 +72,32 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR
6472
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
6573
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
6674
golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
67-
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110 h1:qWPm9rbaAMKs8Bq/9LRpbMqxWRVUAQwMI9fVrssnTfw=
68-
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
75+
golang.org/x/net v0.0.0-20210324205630-d1beb07c2056 h1:sANdAef76Ioam9aQUUdcAqricwY/WUaMc4+7LY4eGg8=
76+
golang.org/x/net v0.0.0-20210324205630-d1beb07c2056/go.mod h1:uSPa2vr4CLtc/ILN5odXGNXS6mhrKVzTaCXzk9m6W3k=
6977
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
7078
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
71-
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
79+
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
7280
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
7381
golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
74-
golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43 h1:SgQ6LNaYJU0JIuEHv9+s6EbhSCwYeAf5Yvj6lpYlqAE=
75-
golang.org/x/sys v0.0.0-20210220050731-9a76102bfb43/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
82+
golang.org/x/sys v0.0.0-20210324051608-47abb6519492 h1:Paq34FxTluEPvVyayQqMPgHm+vTOrIifmcYxFBx9TLg=
83+
golang.org/x/sys v0.0.0-20210324051608-47abb6519492/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
7684
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
7785
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
7886
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
79-
golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k=
8087
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
88+
golang.org/x/text v0.3.5 h1:i6eZZ+zk0SOf0xgBpEpPD18qWcJda6q1sxt3S0kzyUQ=
89+
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
8190
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
91+
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4=
92+
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
8293
google.golang.org/appengine v1.6.7 h1:FZR1q0exgwxzPzp/aF+VccGrSfxfPpkBqjIIEq3ru6c=
8394
google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
95+
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
96+
google.golang.org/protobuf v1.26.0 h1:bxAC2xTBsZGibn2RTntX0oH50xLsqy1OxA9tTL3p/lk=
97+
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
98+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
99+
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
100+
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
101+
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
102+
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b h1:h8qDotaEPuJATrMmW04NCwg7v22aHH28wwpauUhK9Oo=
103+
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

pkg/core/core.go

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
log "github.com/sirupsen/logrus"
1818

1919
jsoniter "github.com/json-iterator/go"
20+
"go.zoe.im/surferua"
2021
)
2122

2223
var json = jsoniter.ConfigCompatibleWithStandardLibrary
@@ -37,6 +38,7 @@ type Config struct {
3738
visitedLinks int
3839
MaxVisitedLinks int
3940
MsDelayBetweenRequests int
41+
UserAgent string
4042
}
4143

4244
// NewConfig struct with default values
@@ -51,6 +53,7 @@ func NewConfig() *Config {
5153
visitedLinks: 0,
5254
MaxVisitedLinks: 10,
5355
MsDelayBetweenRequests: 100,
56+
UserAgent: surferua.New().Desktop().Chrome().String(),
5457
}
5558
}
5659

@@ -98,10 +101,18 @@ func Init(config *Config) (wapp *Wappalyzer, err error) {
98101
// Scraper initialization
99102
switch config.Scraper {
100103
case "colly":
101-
wapp.Scraper = &scraper.CollyScraper{TimeoutSeconds: config.TimeoutSeconds, LoadingTimeoutSeconds: config.LoadingTimeoutSeconds}
104+
wapp.Scraper = &scraper.CollyScraper{
105+
TimeoutSeconds: config.TimeoutSeconds,
106+
LoadingTimeoutSeconds: config.LoadingTimeoutSeconds,
107+
UserAgent: config.UserAgent,
108+
}
102109
err = wapp.Scraper.Init()
103110
case "rod":
104-
wapp.Scraper = &scraper.RodScraper{TimeoutSeconds: config.TimeoutSeconds, LoadingTimeoutSeconds: config.LoadingTimeoutSeconds}
111+
wapp.Scraper = &scraper.RodScraper{
112+
TimeoutSeconds: config.TimeoutSeconds,
113+
LoadingTimeoutSeconds: config.LoadingTimeoutSeconds,
114+
UserAgent: config.UserAgent,
115+
}
105116
err = wapp.Scraper.Init()
106117
default:
107118
log.Errorf("Unknown scraper %s", config.Scraper)
@@ -292,6 +303,11 @@ func analyzePage(paramURL string, wapp *Wappalyzer, detectedApplications *detect
292303
if err == nil {
293304
links = getLinksSlice(doc, paramURL)
294305
}
306+
//Follow redirects
307+
if scraped.URLs.URL != paramURL {
308+
(*links)[strings.TrimRight(scraped.URLs.URL, "/")] = struct{}{}
309+
scraped.URLs.URL = paramURL
310+
}
295311

296312
for _, app := range wapp.Apps {
297313
wg.Add(1)

pkg/core/core_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,6 @@ func TestRecursivity(t *testing.T) {
343343
wapp, err := Init(config)
344344
if assert.NoError(t, err, "GoWap Init error") {
345345
res, err := wapp.Analyze(url)
346-
log.Printf("res : %v", res)
347346
if assert.NoError(t, err, "GoWap Analyze error") {
348347
var output output
349348
err = json.UnmarshalFromString(res.(string), &output)

pkg/scraper/scraper_colly.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ type CollyScraper struct {
2323
Transport *http.Transport
2424
TimeoutSeconds int
2525
LoadingTimeoutSeconds int
26+
UserAgent string
2627
}
2728

2829
func (s *CollyScraper) Init() error {
@@ -38,13 +39,13 @@ func (s *CollyScraper) Init() error {
3839
TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
3940
}
4041

41-
s.Collector = colly.NewCollector(
42-
colly.IgnoreRobotsTxt(),
43-
)
42+
s.Collector = colly.NewCollector()
43+
s.Collector.IgnoreRobotsTxt = false
44+
s.Collector.UserAgent = s.UserAgent
4445
s.Collector.WithTransport(s.Transport)
4546

4647
extensions.Referer(s.Collector)
47-
extensions.RandomUserAgent(s.Collector)
48+
//extensions.RandomUserAgent(s.Collector)
4849

4950
return nil
5051
}

0 commit comments

Comments
 (0)