Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/Application/Config.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public function __construct(
private readonly ?PropertyId $itemTypeProperty = null,
private readonly ?FacetConfigList $facets = null,
private readonly ?array $icons = null,
public readonly bool $indexAllProperties = false
) {
}

Expand Down Expand Up @@ -73,4 +74,16 @@ public function isComplete(): bool {
return $this->itemTypeProperty !== null;
}

/**
* @return PropertyId[]
*/
public function getPropertiesWithFacetsForItemType( ItemId $itemType ): array {
return array_values(
array_map(
fn( FacetConfig $config ) => $config->propertyId,
$this->getFacetConfigForItemType( $itemType )
)
);
}

}
18 changes: 10 additions & 8 deletions src/Application/StatementListTranslator.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public function translateStatements( StatementList $statements ): array {
return [];
}

$propertyIds = $this->getPropertiesToIndex( $itemType );
$propertyIds = $this->getPropertiesToIndex( $itemType, $statements );

$values = [];

Expand All @@ -43,13 +43,15 @@ public function translateStatements( StatementList $statements ): array {
/**
* @return PropertyId[]
*/
private function getPropertiesToIndex( ItemId $itemType ): array {
$properties = array_values(
array_map(
fn( FacetConfig $config ) => $config->propertyId,
$this->config->getFacetConfigForItemType( $itemType )
)
);
private function getPropertiesToIndex( ItemId $itemType, StatementList $statements ): array {
if ( $this->config->indexAllProperties ) {
// TODO: is this sufficient, or do we need to end up explicitly indexing properties not present as empty?
$properties = $statements->getPropertyIds();
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As per https://github.com/ProfessionalWiki/WikibaseFacetedSearch/pull/51/files#r1901363923, it seems we should explicitly tell elastic "there is no value" for all properties not present in the statement list, to avoid keeping data for removed statements in the index.

So we need to replace this "IDs of all properties present in the statement list" with "IDs of all properties in the wiki"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@malberts makes sense to you?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That sounds correct. And also relates to defining all properties in #272 (comment)

}
else {
$properties = $this->config->getPropertiesWithFacetsForItemType( $itemType );
}

$properties[] = $this->config->getItemTypeProperty();

return $properties;
Expand Down
1 change: 1 addition & 0 deletions src/Persistence/ConfigDeserializer.php
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ private function newConfig( array $configArray ): Config {
itemTypeProperty: $this->newPropertyId( $configArray['itemTypeProperty'] ?? null ),
facets: $this->newFacetConfigList( $configArray['configPerItemType'] ?? [] ),
icons: $this->newIconsList( $configArray['configPerItemType'] ?? [] ),
indexAllProperties: (bool)( $configArray['indexAllProperties'] ?? false ),
);
}

Expand Down
5 changes: 3 additions & 2 deletions src/config-example.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
{
"sitelinkSiteId": "enwiki",
"itemTypeProperty": "P42",
"configPerItemType": {
"Q100": {
Expand Down Expand Up @@ -28,5 +27,7 @@
}
}
}
}
},
"sitelinkSiteId": "enwiki",
"indexAllProperties": true
}
4 changes: 4 additions & 0 deletions src/config-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@
"type": [ "string", "null" ],
"pattern": "^P[1-9]\\d{0,9}$"
},
"indexAllProperties": {
"type": "boolean",
"default": false
},
"configPerItemType": {
"type": "object",
"propertyNames": {
Expand Down
29 changes: 29 additions & 0 deletions templates/ConfigurationDocumentation.mustache
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,35 @@
}</pre>
</section>

<section>
<h3 id="IndexAllProperties">Index All Properties</h3>

<p>
By default the extension will index only values for properties for which facets are configured.
Via this setting, you can make the extension index values for all properties instead.
</p>

<p>
Indexing values for all properties is useful
when you want to be able to run structured queries for properties not shown in the UI. It is also
useful to avoid having to rebuild the search index whenever you add a new property to the UI.
Copy link
Contributor

@malberts malberts Jun 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a new property to the UI.

I think this can be misunderstood. I get this refers to the faceted search UI, but I can see how this might look like it's referring to the general UI (including Wikibase).

Adding an existing property (already indexed) in the faceted search config works automatically. But when a new property is added on the wiki and the config, then the index must still be rebuilt.

The hook for defining the fields to be indexed is separate from the hook returning the data.

</p>

<p>
Downsides of indexing values for all properties include needing additional storage space for the
search index and increased time and CPU resources to (re)build said index.
</p>

<p>
Example configuration:
</p>

<pre>
{
"indexAllProperties": true
}</pre>
</section>

<section>
<h3 id="FullExample">{{msg-wikibase-faceted-search-config-help-example}}</h3>

Expand Down
21 changes: 21 additions & 0 deletions tests/phpunit/Application/ConfigTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -180,4 +180,25 @@ public function testGetIconForItemType(): void {
$this->assertNull( $config->getIconForItemType( $q3 ) );
}

public function testGetPropertiesWithFacetsForItemType(): void {
$config = new Config(
sitelinkSiteId: 'enwiki',
itemTypeProperty: new NumericPropertyId( 'P42' ),
facets: new FacetConfigList(
new FacetConfig( new ItemId( 'Q100' ), new NumericPropertyId( 'P2' ), FacetType::LIST ),
new FacetConfig( new ItemId( 'Q200' ), new NumericPropertyId( 'P3' ), FacetType::LIST ),
new FacetConfig( new ItemId( 'Q100' ), new NumericPropertyId( 'P4' ), FacetType::LIST ),
new FacetConfig( new ItemId( 'Q200' ), new NumericPropertyId( 'P5' ), FacetType::LIST ),
)
);

$this->assertEquals(
[
new NumericPropertyId( 'P2' ),
new NumericPropertyId( 'P4' ),
],
$config->getPropertiesWithFacetsForItemType( new ItemId( 'Q100' ) )
);
}

}
51 changes: 49 additions & 2 deletions tests/phpunit/Application/StatementListTranslatorTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,12 @@ public function testTranslatesStatementsForAllConfiguredProperties(): void {
$itemType = new ItemId( 'Q100' );
$propertyP1 = new NumericPropertyId( 'P100' );
$propertyP2 = new NumericPropertyId( 'P200' );
$propertyP3 = new NumericPropertyId( 'P300' );

$statements = new StatementList(
$this->newStatement( $propertyP1, new StringValue( 'unimportant' ) ),
$this->newStatement( $propertyP2, new StringValue( 'unimportant' ) ),
$this->newStatement( $propertyP3, new StringValue( 'unimportant' ) ),
);

$config = new Config(
Expand All @@ -78,17 +80,22 @@ public function testTranslatesStatementsForAllConfiguredProperties(): void {
type: FacetType::LIST
),
new FacetConfig(
itemType: $itemType,
itemType: new ItemId( 'Q404' ), // Different item type, so not indexed
propertyId: $propertyP2,
type: FacetType::LIST
),
new FacetConfig(
itemType: $itemType,
propertyId: $propertyP3,
type: FacetType::LIST
),
)
);

$this->assertEquals(
[
'wbfs_P100' => [ 'translated value' ],
'wbfs_P200' => [ 'translated value' ],
'wbfs_P300' => [ 'translated value' ],
'wbfs_P42' => [],
],
$this->newTranslator(
Expand Down Expand Up @@ -131,4 +138,44 @@ public function testFiltersNullValues(): void {
);
}

public function testTranslatesAllStatementsWhenIndexAllPropertiesIsTrue(): void {
$propertyP1 = new NumericPropertyId( 'P100' );
$propertyP2 = new NumericPropertyId( 'P200' );

$statements = new StatementList(
$this->newStatement( $propertyP1, new StringValue( 'unimportant' ) ),
$this->newStatement( $propertyP2, new StringValue( 'unimportant' ) ),
);

$config = new Config(
itemTypeProperty: new NumericPropertyId( 'P42' ),
facets: new FacetConfigList(
new FacetConfig(
itemType: new ItemId( 'Q100' ),
propertyId: $propertyP1,
type: FacetType::LIST
),
new FacetConfig(
itemType: new ItemId( 'Q200' ),
propertyId: $propertyP2,
type: FacetType::LIST
),
),
indexAllProperties: true
);

$this->assertEquals(
[
'wbfs_P100' => [ 'translated value' ],
'wbfs_P200' => [ 'translated value' ],
'wbfs_P42' => [],
],
$this->newTranslator(
statementTranslator: new StubStatementTranslator( 'translated value' ),
itemTypeExtractor: new StubItemTypeExtractor( new ItemId( 'Q300' ) ),
config: $config
)->translateStatements( $statements )
);
}

}
26 changes: 26 additions & 0 deletions tests/phpunit/Persistence/ConfigDeserializerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,30 @@ public function testDefaultsToEmptyTypeSpecificConfig(): void {
);
}

public function testCanSetIndexAllProperties(): void {
$config = $this->newDeserializer()->deserialize( '{
"indexAllProperties": true
}
' );

$this->assertTrue( $config->indexAllProperties );
}

public function testDefaultsToNotIndexAllProperties(): void {
$config = $this->newDeserializer()->deserialize( '{
}
' );

$this->assertFalse( $config->indexAllProperties );
}

public function testCanSetIndexAllPropertiesToFalse(): void {
$config = $this->newDeserializer()->deserialize( '{
"indexAllProperties": false
}
' );

$this->assertFalse( $config->indexAllProperties );
}

}
Loading