forked from advancedlogic/GoOse
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgoose.go
More file actions
34 lines (29 loc) · 894 Bytes
/
goose.go
File metadata and controls
34 lines (29 loc) · 894 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
package goose
import (
"github.com/pkg/errors"
)
// Goose is the main entry point of the program
type Goose struct {
config Configuration
}
// New returns a new instance of the article extractor
func New(args ...string) Goose {
return Goose{
config: GetDefaultConfiguration(args...),
}
}
// ExtractFromURL follows the URL, fetches the HTML page and returns an article object
func (g Goose) ExtractFromURL(url string) (*Article, error) {
HtmlRequester := NewHtmlRequester(g.config)
html, err := HtmlRequester.fetchHTML(url)
if err != nil {
return nil, errors.Wrap(err, "could not get htnk from site")
}
cc := NewCrawler(g.config)
return cc.Crawl(html, url)
}
// ExtractFromRawHTML returns an article object from the raw HTML content
func (g Goose) ExtractFromRawHTML(RawHTML string, url string) (*Article, error) {
cc := NewCrawler(g.config)
return cc.Crawl(RawHTML, url)
}