Skip to content

Article details #19

@littleyoda

Description

@littleyoda

Describe the bug

Currently, very little information is being returned regarding the article:

{
    "name": "Rasting H\u00e4hnchen-Minutenschnitzel",
    "id": "s1028337"
}

My implementation returns almost all available information as JSON.

Steps to reproduce the issue

from python_picnic_api2 import PicnicAPI
import json
import jq

p = PicnicAPI("XXXX","XXXXX",'de');

JQ_EXTRACTION_FILTER = r'''
# Extract accordion data
def accordion_data:
  [
    .body.child.child.children[]? | 
    select(.id == "accordion-section")? | 
    .children[]? | 
    select(.id == "product-page-accordions")? | 
    .pml.component.items[]? | 
    (.header.child.children[]? | select(.type == "ICON" and has("iconKey"))? | .iconKey) as $icon |
    if $icon == "apple" then
      {
        iconKey: $icon,
        markdowns: [.body.child.children[]? | select(.type == "STACK") | 
                    [.children[]? | select(.type == "RICH_TEXT" and has("markdown")) | .markdown] | 
                    select(length > 0)]
      }
    else
      {
        iconKey: $icon,
        markdowns: [.body | .. | objects | select(.type == "RICH_TEXT" and has("markdown")) | .markdown]
      }
    end |
    select(.iconKey and (.markdowns | length > 0))
  ] | 
  map({(.iconKey): .markdowns}) | 
  add // {};

# Extract name data from main container
def name_data:
  (.. | objects | select(.id == "product-details-page-root-main-container") | 
   .pml.component.children) as $children |
  
  if ($children | length) > 0 then
    # First child is always the product name (RICH_TEXT with HEADER1)
    ($children[0] | select(.type == "RICH_TEXT" and .textType == "HEADER1") | .markdown | gsub("#\\(#[0-9a-fA-F]+\\)"; "")) as $name |
    
    # Check if second child is manufacturer (RICH_TEXT with textAttributes, no textType) or skip if STACK
    (if ($children | length) > 1 and ($children[1].type == "RICH_TEXT") and (($children[1].textType // null) == null) and ($children[1] | has("textAttributes")) 
     then ($children[1].markdown) 
     else null end) as $manufacturer |
    
    # Find weight in STACK children
    ($children[] | select(.type == "STACK") | .children[]? | select(.type == "RICH_TEXT" and has("markdown")) | .markdown | gsub("#\\(#[0-9a-fA-F]+\\)"; "")) as $weight |
    
    {
      product: ($name // null),
      manufacturer: ($manufacturer // null),  
      quantity: ($weight // null),
      name: (if ($manufacturer // null) != null then ($manufacturer + " " + ($name // "")) else ($name // null) end)
    }
  else {} end;

# Extract image IDs from main image container
def image_data:
  (.. | objects | select(.id == "product-page-image-gallery-main-image-container") | 
   [.. | objects | select(.type == "IMAGE" and has("source")) | .source.id] ) as $image_ids |
  if ($image_ids | length) > 0 then {image_ids: $image_ids} else {} end;

# Extract selling_unit_id from analytics
def selling_unit_data:
  ([.. | objects | select(has("analytics")) | .analytics.contexts[]? | select(has("data")) | .data | select(has("selling_unit_id")) | .selling_unit_id] | unique) as $selling_unit_ids |
  if ($selling_unit_ids | length) > 0 then {selling_unit_id: ($selling_unit_ids[0] // null)} else {} end;

# Extract product description (optional) 
def description_data:
  ([.. | objects | select(.id == "product-page-description") | .. | objects | select(.type == "RICH_TEXT" and has("markdown")) | .markdown]) as $description_markdowns |
  {"product-description": $description_markdowns};

# Extract categories from target URLs
def category_data:
  ([.. | objects | select(has("target")) | .target | select(test("app\\.picnic://categories/")) | capture("app\\.picnic://categories/(?<l1>[0-9]+)(/l2/(?<l2>[0-9]+))?(/l3/(?<l3>[0-9]+))?") | [.l1, .l2, .l3] | map(select(. != null)) | map(tonumber)] | unique) as $categories |
  if ($categories | length) > 0 then {"categories": ($categories[0] // [])} else {} end;

# Extract allergies from product-page-allergies
def allergies_data:
  ([.. | objects | select(.id == "product-page-allergies") | .. | objects | select(.type == "RICH_TEXT" and has("markdown")) | .markdown] | unique) as $allergies |
  if ($allergies | length) > 0 then {"allergies": $allergies} else {} end;

# Rename keys after extraction
(accordion_data // {}) + (name_data // {}) + (image_data // {}) + (selling_unit_data // {}) + (description_data // {}) + (category_data // {}) + (allergies_data // {}) |
with_entries(
  if .key == "whisk" then .key = "preparation"
  elif .key == "apple" then .key = "nutritional_values"
  elif .key == "list" then .key = "ingredients"  
  elif .key == "infoCircle" then .key = "info"
  else .
  end
)
'''



def get_article_details(article_id):
    path = f"/pages/product-details-page-root?id={article_id}&show_category_action=true"
    data = p._get(path, add_picnic_headers=True)

    # Use jq to extract accordion data
    compiled_filter = jq.compile(JQ_EXTRACTION_FILTER)
    
    # Handle StopIteration for queries that return no results
    try:
        result = compiled_filter.input(data).first()
    except StopIteration:
        return None
    
    if not result or not isinstance(result, dict) or not result:
        return None
    return result
  
print("Current implementation:")
data = p.get_article("s1028337")
print(json.dumps(data, indent=4))

print("\nDetails:")
d = get_article_details("s1028337")
print(json.dumps(d, indent=4))

What did you expect to happen instead?

It would be great if you could somehow integrate the routine.

Python Version

No response

Metadata

Metadata

Assignees

No one assigned

    Labels

    enhancementNew feature or request

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions