Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion l1/cmd/circuit_breaker/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (

const addr = "localhost:5301"

func main() {
func main() {
http.HandleFunc("/pay", func(writer http.ResponseWriter, request *http.Request) {
bb, err := ioutil.ReadAll(request.Body)
if err != nil {
Expand Down
16 changes: 13 additions & 3 deletions l1/cmd/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ type crawlResult struct {

type crawler struct {
sync.Mutex
rwMutex sync.RWMutex
visited map[string]string
maxDepth int
}
Expand All @@ -26,11 +27,20 @@ func newCrawler(maxDepth int) *crawler {
}
}

func (c *crawler) dive() {
c.rwMutex.RLock()
defer c.rwMutex.RUnlock()
c.maxDepth += 2
}

// рекурсивно сканируем страницы
func (c *crawler) run(ctx context.Context, url string, results chan<- crawlResult, depth int) {
// просто для того, чтобы успевать следить за выводом программы, можно убрать :)
time.Sleep(2 * time.Second)

ctxParse, cancel := context.WithTimeout(ctx, 3 * time.Second)
defer cancel()

// проверяем что контекст исполнения актуален
select {
case <-ctx.Done():
Expand All @@ -42,7 +52,7 @@ func (c *crawler) run(ctx context.Context, url string, results chan<- crawlResul
return
}

page, err := parse(url)
page, err := parse(ctxParse, url)
if err != nil {
// ошибку отправляем в канал, а не обрабатываем на месте
results <- crawlResult{
Expand All @@ -51,8 +61,8 @@ func (c *crawler) run(ctx context.Context, url string, results chan<- crawlResul
return
}

title := pageTitle(page)
links := pageLinks(nil, page)
title := pageTitle(ctxParse, page)
links := pageLinks(ctxParse, nil, page)

// блокировка требуется, т.к. мы модифицируем мапку в несколько горутин
c.Lock()
Expand Down
15 changes: 14 additions & 1 deletion l1/cmd/crawler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ func main() {
// запускаем горутину для чтения из каналов
done := watchCrawler(ctx, results, errorsLimit, resultsLimit)

// запускаем горутину для прослушивания сигнала пользователя
go watchUserSignal(crawler)

// запуск основной логики
// внутри есть рекурсивные запуски анализа в других горутинах
crawler.run(ctx, url, results, 0)
Expand All @@ -68,7 +71,7 @@ func main() {

// ловим сигналы выключения
func watchSignals(cancel context.CancelFunc) {
osSignalChan := make(chan os.Signal)
osSignalChan := make(chan os.Signal, 1)

signal.Notify(osSignalChan,
syscall.SIGINT,
Expand All @@ -81,6 +84,16 @@ func watchSignals(cancel context.CancelFunc) {
cancel()
}

// ловим сигнал пользователя
func watchUserSignal(c *crawler) {
osUserChan := make(chan os.Signal, 1)

signal.Notify(osUserChan, syscall.SIGUSR1)
<- osUserChan
log.Printf("got user signal to search deep")
c.dive()
}

func watchCrawler(ctx context.Context, results <-chan crawlResult, maxErrors, maxResults int) chan struct{} {
readersDone := make(chan struct{})

Expand Down
96 changes: 60 additions & 36 deletions l1/cmd/crawler/ugly_parser.go
Original file line number Diff line number Diff line change
@@ -1,60 +1,84 @@
package main

import (
"bytes"
"context"
"fmt"
"golang.org/x/net/html"
"net/http"
"time"
)

// парсим страницу
func parse(url string) (*html.Node, error) {
// что здесь должно быть вместо http.Get? :)
r, err := http.Get(url)
if err != nil {
return nil, fmt.Errorf("can't get page")
}
b, err := html.Parse(r.Body)
if err != nil {
return nil, fmt.Errorf("can't parse page")
func parse(ctx context.Context, url string) (*html.Node, error) {
select {
case <-ctx.Done():
return nil, nil
default:
client := http.Client{Timeout: 1 * time.Second}
request, err := http.NewRequest("GET", url, bytes.NewReader([]byte{}))
if err != nil {
return nil, fmt.Errorf("can't send request")
}

r, err := client.Do(request)
if err != nil {
return nil, fmt.Errorf("can't get page")
}

b, err := html.Parse(r.Body)
if err != nil {
return nil, fmt.Errorf("can't parse page")
}
return b, err
}
return b, err
}

// ищем заголовок на странице
func pageTitle(n *html.Node) string {
var title string
if n.Type == html.ElementNode && n.Data == "title" {
return n.FirstChild.Data
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
title = pageTitle(c)
if title != "" {
break
func pageTitle(ctx context.Context, n *html.Node) string {
select {
case <-ctx.Done():
return ""
default:
var title string
if n.Type == html.ElementNode && n.Data == "title" {
return n.FirstChild.Data
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
title = pageTitle(ctx, c)
if title != "" {
break
}
}
return title
}
return title
}

// ищем все ссылки на страницы. Используем мапку чтобы избежать дубликатов
func pageLinks(links map[string]struct{}, n *html.Node) map[string]struct{} {
if links == nil {
links = make(map[string]struct{})
}
func pageLinks(ctx context.Context, links map[string]struct{}, n *html.Node) map[string]struct{} {
select {
case <-ctx.Done():
return nil
default:
if links == nil {
links = make(map[string]struct{})
}

if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key != "href" {
continue
}
if n.Type == html.ElementNode && n.Data == "a" {
for _, a := range n.Attr {
if a.Key != "href" {
continue
}

// костылик для простоты
if _, ok := links[a.Val]; !ok && len(a.Val) > 2 && a.Val[:2] == "//" {
links["http://"+a.Val[2:]] = struct{}{}
// костылик для простоты
if _, ok := links[a.Val]; !ok && len(a.Val) > 2 && a.Val[:2] == "//" {
links["http://"+a.Val[2:]] = struct{}{}
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
links = pageLinks(ctx, links, c)
}
return links
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
links = pageLinks(links, c)
}
return links
}
4 changes: 2 additions & 2 deletions l1/cmd/signals/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ import (
func main() {
log.Print("start")

sigintChan := make(chan os.Signal)
sigintChan := make(chan os.Signal, 1)
signal.Notify(sigintChan, syscall.SIGINT)

sigtermChan := make(chan os.Signal)
sigtermChan := make(chan os.Signal, 1)
signal.Notify(sigtermChan, syscall.SIGTERM)

select {
Expand Down