Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 16 additions & 7 deletions l1/cmd/crawler/crawler.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ type crawlResult struct {
}

type crawler struct {
sync.Mutex
visited map[string]string
maxDepth int
mutexVisited sync.Mutex
mutexMaxDepth sync.RWMutex
visited map[string]string
maxDepth int
}

func newCrawler(maxDepth int) *crawler {
Expand All @@ -26,6 +27,12 @@ func newCrawler(maxDepth int) *crawler {
}
}

func (c *crawler) increaseMaxDepth(maxDepth int) {
c.mutexMaxDepth.Lock()
c.maxDepth += maxDepth
c.mutexMaxDepth.Unlock()
}

// рекурсивно сканируем страницы
func (c *crawler) run(ctx context.Context, url string, results chan<- crawlResult, depth int) {
// просто для того, чтобы успевать следить за выводом программы, можно убрать :)
Expand All @@ -37,10 +44,12 @@ func (c *crawler) run(ctx context.Context, url string, results chan<- crawlResul
return

default:
c.mutexMaxDepth.RLock()
// проверка глубины
if depth >= c.maxDepth {
return
}
c.mutexMaxDepth.RUnlock()

page, err := parse(url)
if err != nil {
Expand All @@ -55,9 +64,9 @@ func (c *crawler) run(ctx context.Context, url string, results chan<- crawlResul
links := pageLinks(nil, page)

// блокировка требуется, т.к. мы модифицируем мапку в несколько горутин
c.Lock()
c.mutexVisited.Lock()
c.visited[url] = title
c.Unlock()
c.mutexVisited.Unlock()

// отправляем результат в канал, не обрабатывая на месте
results <- crawlResult{
Expand All @@ -78,8 +87,8 @@ func (c *crawler) run(ctx context.Context, url string, results chan<- crawlResul
}

func (c *crawler) checkVisited(url string) bool {
c.Lock()
defer c.Unlock()
c.mutexVisited.Lock()
defer c.mutexVisited.Unlock()

_, ok := c.visited[url]
return ok
Expand Down
34 changes: 21 additions & 13 deletions l1/cmd/crawler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,10 @@ func init() {

func main() {
started := time.Now()

ctx, cancel := context.WithCancel(context.Background())
go watchSignals(cancel)
defer cancel()

ctx, cancel := context.WithTimeout(context.Background(), time.Second*3)
crawler := newCrawler(depthLimit)
go watchSignals(cancel, crawler)
defer cancel()

// создаём канал для результатов
results := make(chan crawlResult)
Expand All @@ -67,18 +65,27 @@ func main() {
}

// ловим сигналы выключения
func watchSignals(cancel context.CancelFunc) {
osSignalChan := make(chan os.Signal)
func watchSignals(cancel context.CancelFunc, crawler *crawler) {
osSignalTerminatedChan := make(chan os.Signal)
osSignalUserChan := make(chan os.Signal)

signal.Notify(osSignalChan,
signal.Notify(osSignalTerminatedChan,
syscall.SIGINT,
syscall.SIGTERM)

sig := <-osSignalChan
log.Printf("got signal %q", sig.String())

// если сигнал получен, отменяем контекст работы
cancel()
signal.Notify(osSignalUserChan, syscall.SIGUSR1)
for {
select {
case sig := <-osSignalTerminatedChan:
log.Printf("got signal %q", sig.String())
// если сигнал получен, отменяем контекст работы
cancel()
return
case <-osSignalUserChan:
crawler.increaseMaxDepth(10)
log.Printf("got signal %d", crawler.maxDepth)
}
}
}

func watchCrawler(ctx context.Context, results <-chan crawlResult, maxErrors, maxResults int) chan struct{} {
Expand All @@ -93,6 +100,7 @@ func watchCrawler(ctx context.Context, results <-chan crawlResult, maxErrors, ma

case result := <-results:
if result.err != nil {
log.Println(result.err.Error())
maxErrors--
if maxErrors <= 0 {
log.Println("max errors exceeded")
Expand Down
2 changes: 1 addition & 1 deletion l1/go.mod
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module github.com/kilchik/gb/lesson1
module github.com/Nibolg1994/gb/lesson1

go 1.15

Expand Down