forked from MikeBrink/python-picnic-api
-
Notifications
You must be signed in to change notification settings - Fork 3
Open
Labels
enhancementNew feature or requestNew feature or request
Description
Describe the bug
Currently, very little information is being returned regarding the article:
{
"name": "Rasting H\u00e4hnchen-Minutenschnitzel",
"id": "s1028337"
}
My implementation returns almost all available information as JSON.
Steps to reproduce the issue
from python_picnic_api2 import PicnicAPI
import json
import jq
p = PicnicAPI("XXXX","XXXXX",'de');
JQ_EXTRACTION_FILTER = r'''
# Extract accordion data
def accordion_data:
[
.body.child.child.children[]? |
select(.id == "accordion-section")? |
.children[]? |
select(.id == "product-page-accordions")? |
.pml.component.items[]? |
(.header.child.children[]? | select(.type == "ICON" and has("iconKey"))? | .iconKey) as $icon |
if $icon == "apple" then
{
iconKey: $icon,
markdowns: [.body.child.children[]? | select(.type == "STACK") |
[.children[]? | select(.type == "RICH_TEXT" and has("markdown")) | .markdown] |
select(length > 0)]
}
else
{
iconKey: $icon,
markdowns: [.body | .. | objects | select(.type == "RICH_TEXT" and has("markdown")) | .markdown]
}
end |
select(.iconKey and (.markdowns | length > 0))
] |
map({(.iconKey): .markdowns}) |
add // {};
# Extract name data from main container
def name_data:
(.. | objects | select(.id == "product-details-page-root-main-container") |
.pml.component.children) as $children |
if ($children | length) > 0 then
# First child is always the product name (RICH_TEXT with HEADER1)
($children[0] | select(.type == "RICH_TEXT" and .textType == "HEADER1") | .markdown | gsub("#\\(#[0-9a-fA-F]+\\)"; "")) as $name |
# Check if second child is manufacturer (RICH_TEXT with textAttributes, no textType) or skip if STACK
(if ($children | length) > 1 and ($children[1].type == "RICH_TEXT") and (($children[1].textType // null) == null) and ($children[1] | has("textAttributes"))
then ($children[1].markdown)
else null end) as $manufacturer |
# Find weight in STACK children
($children[] | select(.type == "STACK") | .children[]? | select(.type == "RICH_TEXT" and has("markdown")) | .markdown | gsub("#\\(#[0-9a-fA-F]+\\)"; "")) as $weight |
{
product: ($name // null),
manufacturer: ($manufacturer // null),
quantity: ($weight // null),
name: (if ($manufacturer // null) != null then ($manufacturer + " " + ($name // "")) else ($name // null) end)
}
else {} end;
# Extract image IDs from main image container
def image_data:
(.. | objects | select(.id == "product-page-image-gallery-main-image-container") |
[.. | objects | select(.type == "IMAGE" and has("source")) | .source.id] ) as $image_ids |
if ($image_ids | length) > 0 then {image_ids: $image_ids} else {} end;
# Extract selling_unit_id from analytics
def selling_unit_data:
([.. | objects | select(has("analytics")) | .analytics.contexts[]? | select(has("data")) | .data | select(has("selling_unit_id")) | .selling_unit_id] | unique) as $selling_unit_ids |
if ($selling_unit_ids | length) > 0 then {selling_unit_id: ($selling_unit_ids[0] // null)} else {} end;
# Extract product description (optional)
def description_data:
([.. | objects | select(.id == "product-page-description") | .. | objects | select(.type == "RICH_TEXT" and has("markdown")) | .markdown]) as $description_markdowns |
{"product-description": $description_markdowns};
# Extract categories from target URLs
def category_data:
([.. | objects | select(has("target")) | .target | select(test("app\\.picnic://categories/")) | capture("app\\.picnic://categories/(?<l1>[0-9]+)(/l2/(?<l2>[0-9]+))?(/l3/(?<l3>[0-9]+))?") | [.l1, .l2, .l3] | map(select(. != null)) | map(tonumber)] | unique) as $categories |
if ($categories | length) > 0 then {"categories": ($categories[0] // [])} else {} end;
# Extract allergies from product-page-allergies
def allergies_data:
([.. | objects | select(.id == "product-page-allergies") | .. | objects | select(.type == "RICH_TEXT" and has("markdown")) | .markdown] | unique) as $allergies |
if ($allergies | length) > 0 then {"allergies": $allergies} else {} end;
# Rename keys after extraction
(accordion_data // {}) + (name_data // {}) + (image_data // {}) + (selling_unit_data // {}) + (description_data // {}) + (category_data // {}) + (allergies_data // {}) |
with_entries(
if .key == "whisk" then .key = "preparation"
elif .key == "apple" then .key = "nutritional_values"
elif .key == "list" then .key = "ingredients"
elif .key == "infoCircle" then .key = "info"
else .
end
)
'''
def get_article_details(article_id):
path = f"/pages/product-details-page-root?id={article_id}&show_category_action=true"
data = p._get(path, add_picnic_headers=True)
# Use jq to extract accordion data
compiled_filter = jq.compile(JQ_EXTRACTION_FILTER)
# Handle StopIteration for queries that return no results
try:
result = compiled_filter.input(data).first()
except StopIteration:
return None
if not result or not isinstance(result, dict) or not result:
return None
return result
print("Current implementation:")
data = p.get_article("s1028337")
print(json.dumps(data, indent=4))
print("\nDetails:")
d = get_article_details("s1028337")
print(json.dumps(d, indent=4))
What did you expect to happen instead?
It would be great if you could somehow integrate the routine.
Python Version
No response
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request