Skip to content

Commit 480ff12

Browse files
committed
Merge branch 'release/1.0.0' into main
2 parents c6da078 + 6a9a6d5 commit 480ff12

File tree

8 files changed

+152
-22
lines changed

8 files changed

+152
-22
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
matrix:
1717
os: [ 'ubuntu-latest' ]
1818
php: [ '8.0', '8.1' ]
19-
symfony_version: [ '5.3.*', '5.4.*', '6.0.*' ]
19+
symfony_version: [ '6.0.*' ]
2020
dependency-version: [ 'prefer-lowest', 'prefer-stable' ]
2121

2222
runs-on: ${{ matrix.os }}
@@ -65,7 +65,6 @@ jobs:
6565
run: vendor/bin/simple-phpunit install
6666

6767
- name: Run psalm
68-
if: matrix.symfony_version != '5.3.*'
6968
run: vendor/bin/psalm
7069

7170
- name: Run tests with phpunit

.php-cs-fixer.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1337,6 +1337,7 @@
13371337
'phpdoc_to_comment' => [
13381338
'ignored_tags' => [
13391339
'noinspection',
1340+
'psalm-suppress',
13401341
],
13411342
],
13421343

README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ Add `nelexa/roach-php-bundle` to your composer.json file:
2020
composer require nelexa/roach-php-bundle
2121
```
2222

23+
## Versions & Dependencies
24+
| Bundle version | roach-php/core version | Symfony version | PHP version(s) |
25+
|----------------|------------------------|-----------------|----------------|
26+
| 0.3.0 | 0.3.0 | ^5.3 \| ^6.0 | >= 8.0 |
27+
| 1.0.0 | 1.0.0 | ^6.0 | >= 8.0 |
28+
2329
#### Register the bundle:
2430
Register bundle into config/bundles.php (Flex did it automatically):
2531
```php
@@ -58,6 +64,11 @@ php bin/console roach:php google --concurrency 8 --delay 2
5864
```
5965
These options override the `$concurrency` and `$requestDelay` public properties of your spider.
6066

67+
Add the `--output` (`-o`) option and you can save the collected data to a JSON file.
68+
```bash
69+
php bin/console roach:php google --output 'path/to/data.json'
70+
```
71+
6172
### Starting the REPL
6273

6374
Roach ships with an [interactive shell](https://roach-php.dev/docs/repl) (often called Read-Evaluate-Print-Loop, or Repl for short) which makes prototyping our spiders a breeze. We can use the provided `roach:shell` command to launch a new Repl session.

composer.json

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,18 @@
44
"type": "symfony-bundle",
55
"require": {
66
"php": ">= 8.0",
7-
"roach-php/core": "^0.3.0",
8-
"symfony/config": "^5.3 | ^6.0",
9-
"symfony/dependency-injection": "^5.3 | ^6.0",
10-
"symfony/http-kernel": "^5.3 | ^6.0",
11-
"symfony/console": "^5.3 | ^6.0"
7+
"roach-php/core": "^1.0",
8+
"symfony/config": "^6.0",
9+
"symfony/dependency-injection": "^6.0",
10+
"symfony/http-kernel": "^6.0",
11+
"symfony/console": "^6.0",
12+
"symfony/serializer": "^6.0"
1213
},
1314
"require-dev": {
1415
"roave/security-advisories": "dev-latest",
15-
"symfony/phpunit-bridge": "^5.3 | ^6.0",
16-
"symfony/var-dumper": "^5.3 | ^6.0",
17-
"symfony/framework-bundle": "^5.3 | ^6.0",
16+
"symfony/phpunit-bridge": "^6.0",
17+
"symfony/var-dumper": "^6.0",
18+
"symfony/framework-bundle": "^6.0",
1819
"symfony/maker-bundle": "^1.37",
1920
"vimeo/psalm": "^4.21",
2021
"psalm/plugin-symfony": "^3.1",
@@ -37,5 +38,8 @@
3738
"name": "Ne-Lexa",
3839
"email": "alexey@nelexa.ru"
3940
}
40-
]
41+
],
42+
"suggest": {
43+
"spatie/browsershot": "Required to execute Javascript in spiders"
44+
}
4145
}

src/Command/RunSpiderCommand.php

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323
use Symfony\Component\Console\Style\OutputStyle;
2424
use Symfony\Component\Console\Style\SymfonyStyle;
2525
use Symfony\Component\DependencyInjection\ServiceLocator;
26+
use Symfony\Component\Serializer\Encoder\JsonEncode;
27+
use Symfony\Component\Serializer\SerializerInterface;
2628

2729
final class RunSpiderCommand extends Command
2830
{
@@ -33,7 +35,7 @@ final class RunSpiderCommand extends Command
3335
/** @var array<class-string<\RoachPHP\Spider\SpiderInterface>, array<string>> */
3436
private array $spiderNames;
3537

36-
public function __construct(private ServiceLocator $serviceLocator)
38+
public function __construct(private ServiceLocator $serviceLocator, private SerializerInterface $serializer)
3739
{
3840
/** @var array<class-string<\RoachPHP\Spider\SpiderInterface>> $providedServices */
3941
$providedServices = $this->serviceLocator->getProvidedServices();
@@ -55,6 +57,7 @@ protected function configure(): void
5557
->addArgument('spider', InputArgument::OPTIONAL, rtrim($spiderArgDescription))
5658
->addOption('delay', 't', InputOption::VALUE_OPTIONAL, 'The delay (in seconds) between requests.')
5759
->addOption('concurrency', 'p', InputOption::VALUE_OPTIONAL, 'The number of concurrent requests.')
60+
->addOption('output', 'o', InputOption::VALUE_OPTIONAL, 'Save to JSON file')
5861
;
5962
}
6063

@@ -107,6 +110,8 @@ private function selectSpiderClassName(OutputStyle $io): string
107110

108111
protected function execute(InputInterface $input, OutputInterface $output): int
109112
{
113+
$io = new SymfonyStyle($input, $output);
114+
$outputFilename = $input->getOption('output');
110115
$spiderName = $input->getArgument('spider');
111116
$spiderClassName = $this->findSpiderClass($spiderName);
112117

@@ -135,7 +140,15 @@ protected function execute(InputInterface $input, OutputInterface $output): int
135140
requestDelay: $delay,
136141
);
137142

138-
Roach::startSpider($spiderClassName, $overrides);
143+
if ($outputFilename !== null) {
144+
$collectData = Roach::collectSpider($spiderClassName, $overrides);
145+
146+
if (!$this->saveCollectData($collectData, $outputFilename, $io)) {
147+
return self::FAILURE;
148+
}
149+
} else {
150+
Roach::startSpider($spiderClassName, $overrides);
151+
}
139152

140153
return self::SUCCESS;
141154
}
@@ -155,4 +168,29 @@ private function findSpiderClass(?string $spiderName): ?string
155168

156169
return null;
157170
}
171+
172+
private function saveCollectData(array $collectData, string $outputFilename, SymfonyStyle $io): bool
173+
{
174+
$content = $this->serializer->serialize($collectData, 'json', [
175+
JsonEncode::OPTIONS => \JSON_UNESCAPED_UNICODE | \JSON_PRETTY_PRINT | \JSON_UNESCAPED_LINE_TERMINATORS | \JSON_UNESCAPED_SLASHES | \JSON_THROW_ON_ERROR,
176+
]);
177+
178+
$dirname = \dirname($outputFilename);
179+
180+
if (!is_dir($dirname) && !mkdir($dirname, 0755, true) && !is_dir($dirname)) {
181+
$io->error(sprintf('Directory "%s" was not created', $dirname));
182+
183+
return false;
184+
}
185+
186+
if (file_put_contents($outputFilename, $content) === false) {
187+
$io->error(sprintf('An error occurred while saving output to file %s', $dirname));
188+
189+
return false;
190+
}
191+
192+
$io->success(sprintf('Collected data successfully saved to file %s', $outputFilename));
193+
194+
return true;
195+
}
158196
}

src/Normalizer/ItemNormalizer.php

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
/*
6+
* Copyright (c) 2022 Ne-Lexa <alexey@nelexa.ru>
7+
*
8+
* For the full copyright and license information, please view
9+
* the LICENSE file that was distributed with this source code.
10+
*
11+
* @see https://github.com/Ne-Lexa/roach-php-bundle
12+
*/
13+
14+
namespace Nelexa\RoachPhpBundle\Normalizer;
15+
16+
use RoachPHP\ItemPipeline\ItemInterface;
17+
use Symfony\Component\Serializer\Normalizer\CacheableSupportsMethodInterface;
18+
use Symfony\Component\Serializer\Normalizer\NormalizerInterface;
19+
20+
class ItemNormalizer implements NormalizerInterface, CacheableSupportsMethodInterface
21+
{
22+
public function hasCacheableSupportsMethod(): bool
23+
{
24+
return true;
25+
}
26+
27+
public function supportsNormalization(mixed $data, ?string $format = null): bool
28+
{
29+
return $data instanceof ItemInterface;
30+
}
31+
32+
public function normalize(mixed $object, ?string $format = null, array $context = []): array
33+
{
34+
/** @psalm-suppress all */
35+
return $object->all();
36+
}
37+
}

src/Resources/config/services.php

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,16 @@
8484
service('event_dispatcher'),
8585
])
8686
;
87+
$services->alias(\RoachPHP\Core\EngineInterface::class, \RoachPHP\Core\Engine::class);
88+
89+
$services
90+
->set(\RoachPHP\Core\Runner::class)
91+
->args([
92+
service('service_container'),
93+
service(\RoachPHP\Core\EngineInterface::class),
94+
])
95+
;
96+
$services->alias(\RoachPHP\Core\RunnerInterface::class, \RoachPHP\Core\Runner::class);
8797

8898
// Downloader and downloader middlewares
8999
$services
@@ -125,6 +135,7 @@
125135
->set(\Nelexa\RoachPhpBundle\Command\RunSpiderCommand::class)
126136
->args([
127137
tagged_locator('roach_php.spider'),
138+
service('serializer'),
128139
])
129140
->tag('console.command')
130141
;
@@ -162,4 +173,10 @@
162173
->set(\Nelexa\RoachPhpBundle\Maker\Spider\MakeItemMiddleware::class)
163174
->tag('maker.command')
164175
;
176+
177+
// normalizers
178+
$services
179+
->set(\Nelexa\RoachPhpBundle\Normalizer\ItemNormalizer::class)
180+
->tag('serializer.normalizer', [])
181+
;
165182
};

tests/Command/RunSpiderCommandTest.php

Lines changed: 32 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,14 @@
1515

1616
use Symfony\Bundle\FrameworkBundle\Console\Application;
1717
use Symfony\Bundle\FrameworkBundle\Test\KernelTestCase;
18-
use Symfony\Component\Console\Command\Command;
1918
use Symfony\Component\Console\Tester\CommandTester;
2019

2120
/**
2221
* @internal
2322
*
2423
* @medium
2524
*/
26-
class RunSpiderCommandTest extends KernelTestCase
25+
final class RunSpiderCommandTest extends KernelTestCase
2726
{
2827
public function testExecute(): void
2928
{
@@ -36,11 +35,7 @@ public function testExecute(): void
3635
'spider' => 'quotes',
3736
]);
3837

39-
if (method_exists($commandTester, 'assertCommandIsSuccessful')) {
40-
$commandTester->assertCommandIsSuccessful();
41-
} else {
42-
static::assertSame(Command::SUCCESS, $commandTester->getStatusCode());
43-
}
38+
$commandTester->assertCommandIsSuccessful();
4439
}
4540

4641
public function testUnknownSpider(): void
@@ -54,7 +49,35 @@ public function testUnknownSpider(): void
5449
'spider' => 'unknown_spider',
5550
]);
5651

57-
static::assertNotSame(0, $commandTester->getStatusCode());
58-
static::assertStringContainsString('[ERROR] Unknown spider unknown_spider', $commandTester->getDisplay());
52+
self::assertNotSame(0, $commandTester->getStatusCode());
53+
self::assertStringContainsString('[ERROR] Unknown spider unknown_spider', $commandTester->getDisplay());
54+
}
55+
56+
public function testSpiderCommandOutputToJsonFile(): void
57+
{
58+
$outputFilename = sys_get_temp_dir() . '/_roach-export.json';
59+
60+
$kernel = self::bootKernel();
61+
$application = new Application($kernel);
62+
63+
$command = $application->find('roach:run');
64+
65+
try {
66+
$commandTester = new CommandTester($command);
67+
$commandTester->execute([
68+
'spider' => 'quotes',
69+
'--output' => $outputFilename,
70+
]);
71+
72+
$commandTester->assertCommandIsSuccessful();
73+
74+
self::assertFileExists($outputFilename);
75+
$json = json_decode(file_get_contents($outputFilename), false, 512, \JSON_THROW_ON_ERROR);
76+
self::assertNotEmpty($json);
77+
} finally {
78+
if (is_file($outputFilename)) {
79+
unlink($outputFilename);
80+
}
81+
}
5982
}
6083
}

0 commit comments

Comments
 (0)