Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
f94d651
Bump ELM version
ppinchuk Oct 9, 2025
b266fde
No area values for coverage
ppinchuk Oct 9, 2025
b37795c
Add comment about commercial energy production systems
ppinchuk Oct 9, 2025
4470069
Add CSP to ignore list
ppinchuk Oct 9, 2025
5561bb0
Raise error to force re-try
ppinchuk Oct 10, 2025
830f9f6
Minor instruction update
ppinchuk Oct 10, 2025
0bb307f
Rename node
ppinchuk Oct 16, 2025
907a8e4
Update deps
ppinchuk Oct 18, 2025
dfd23ed
Pass tech down to legal text validator
ppinchuk Oct 19, 2025
82201e8
Add missing arg
ppinchuk Oct 19, 2025
301a4f2
Document new arg
ppinchuk Oct 19, 2025
888510c
Add todo reminder
ppinchuk Oct 19, 2025
fc16ab6
Refactor out system size message
ppinchuk Oct 19, 2025
84cb0f4
Minor bug fix
ppinchuk Oct 19, 2025
44f87c7
WES system reminder
ppinchuk Oct 19, 2025
9f8ed44
Update system size extraction
ppinchuk Oct 19, 2025
c004ffe
MInor prompt update
ppinchuk Oct 19, 2025
18fca84
UPdate district prompt
ppinchuk Oct 19, 2025
2def251
Update how clarification works for districts
ppinchuk Oct 19, 2025
9f68084
Add `prohibited use districts`
ppinchuk Oct 19, 2025
d388ce2
Pass down feature id
ppinchuk Oct 19, 2025
0f8813f
Fix typo
ppinchuk Oct 19, 2025
7780a97
Parse can return None
ppinchuk Oct 19, 2025
b592ca0
Update system size
ppinchuk Oct 19, 2025
929fc6e
Clarify lot size units
ppinchuk Oct 19, 2025
48c8c6b
Add parse feature
ppinchuk Oct 19, 2025
aba3e81
Break out density feature
ppinchuk Oct 19, 2025
116869f
Update clarification
ppinchuk Oct 19, 2025
44f425b
`feature` -> `feature_id`
ppinchuk Oct 19, 2025
8a00d3f
Add date instructions
ppinchuk Oct 19, 2025
75fc052
Clarify logger statements
ppinchuk Oct 19, 2025
c73988a
Soften prompt
ppinchuk Oct 19, 2025
c9fc542
Update for prohibited use type
ppinchuk Oct 19, 2025
5b2b38b
update prompts for permitted use district extractions
ppinchuk Oct 19, 2025
df7938d
Minor prompt update to match definition
ppinchuk Oct 19, 2025
4c83c2a
Minor prompt update
ppinchuk Oct 19, 2025
7d56c21
Update extra value extraction graphs
ppinchuk Oct 19, 2025
983c269
Minor formatting
ppinchuk Oct 19, 2025
71b7595
minor prompt update
ppinchuk Oct 19, 2025
b51ca38
Add node to check for in-effect ordinances
ppinchuk Oct 19, 2025
9105f99
Add public notice check
ppinchuk Oct 19, 2025
489e5f1
Update `is_draft` prompt
ppinchuk Oct 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions compass/common/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from .base import (
EXTRACT_ORIGINAL_TEXT_PROMPT,
SYSTEM_SIZE_REMINDER,
BaseTextExtractor,
empty_output,
llm_response_starts_with_no,
Expand Down
587 changes: 361 additions & 226 deletions compass/common/base.py

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions compass/extraction/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ async def check_for_ordinance_info(
doc,
model_config,
heuristic,
tech,
ordinance_text_collector_class,
permitted_use_text_collector_class=None,
usage_tracker=None,
Expand All @@ -36,6 +37,9 @@ async def check_for_ordinance_info(
``"contains_ord_info"`` key, it will not be processed. To force
a document to be processed by this function, remove that key
from the documents attrs.
tech : str
Technology of interest (e.g. "solar", "wind", etc). This is
used to set up some document validation decision trees.
text_splitter : obj
Instance of an object that implements a `split_text` method.
The method should take text as input (str) and return a list
Expand Down Expand Up @@ -65,6 +69,7 @@ async def check_for_ordinance_info(
chunks = model_config.text_splitter.split_text(doc.text)
chunk_parser = ParseChunksWithMemory(chunks, num_to_recall=2)
legal_text_validator = LegalTextValidator(
tech=tech,
llm_service=model_config.llm_service,
usage_tracker=usage_tracker,
doc_is_from_ocr=doc.attrs.get("from_ocr", False),
Expand Down
6 changes: 4 additions & 2 deletions compass/extraction/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,10 @@ def __init__(self, structured_llm_caller, text_splitter=None):
structured_llm_caller : compass.llm.StructuredLLMCaller
StructuredLLMCaller instance. Used for structured validation
queries.
text_splitter : langchain.text_splitter.TextSplitter, optional
Optional text splitter instance to attach to doc (used for
text_splitter : TextSplitter, optional
Optional
:class:`langchain_text_splitters.character.TextSplitter`
text splitter instance to attach to doc (used for
splitting out pages in an HTML document).
By default, ``None``.
"""
Expand Down
1 change: 1 addition & 0 deletions compass/extraction/solar/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,4 +47,5 @@
"renewable": 3,
"municipal": 1,
"department": 1,
# TODO: add board???
}
136 changes: 100 additions & 36 deletions compass/extraction/solar/graphs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
setup_graph_no_nodes,
llm_response_starts_with_yes,
llm_response_starts_with_no,
SYSTEM_SIZE_REMINDER,
)


Expand Down Expand Up @@ -45,22 +46,101 @@ def setup_graph_sef_types(**kwargs):
"What are the different solar energy farm sizes **regulated by "
"this ordinance**? List them in order of increasing size. "
"Include any relevant numerical qualifiers in the name, if "
"appropriate. Only include solar energy farm types; do not "
"include generic types or other energy system types."
"appropriate. Only include systems that resemble ground-mounted "
"solar energy farms; do not include other solar energy system "
"types lice CSP or roof-mounted systems or other technologies "
"like wind energy systems, geothermal energy systems, etc."
),
)
G.add_edge("get_text", "final")
G.add_edge("get_text", "get_regulated")
G.add_node(
"final",
"get_regulated",
prompt=(
"Are any of these systems **not** regulated by this ordinance?"
),
)

G.add_edge("get_regulated", "get_largest")
G.add_node(
"get_largest",
prompt=(
"What is the **largest** solar energy farm size that **is "
"regulated by this ordinance**?"
),
)

G.add_edge("get_largest", "check_matches_definition")
G.add_node(
"check_matches_definition",
prompt=(
"Does the ordinance explicitly define this system as large, "
"commercial, utility-scale, or something akin to that? "
"Please start your response with either 'Yes' or 'No' and briefly "
"explain your answer."
),
)

G.add_edge(
"check_matches_definition",
"final_large",
condition=llm_response_starts_with_yes,
)
G.add_edge(
"check_matches_definition",
"check_scale_reason",
condition=llm_response_starts_with_no,
)
G.add_node(
"check_scale_reason",
prompt=(
"Would a reasonable person classify this kind of system as a "
"**large**, commercial, or even utility-scale** solar energy farm "
"(e.g. with the primary purpose of generating electricity for "
"sale, as opposed to small, residential, roof-mounted, private, "
"or other kinds of 'small' systems)? "
"Please start your response with either 'Yes' or 'No' and briefly "
"explain your answer."
),
)

G.add_edge(
"check_scale_reason",
"final_large",
condition=llm_response_starts_with_yes,
)
G.add_edge(
"check_scale_reason",
"final_small",
condition=llm_response_starts_with_no,
)
G.add_node(
"final_large",
prompt=(
"Respond based on our entire conversation so far. Return your "
"answer as a dictionary in JSON format (not markdown). Your "
"JSON file must include exactly two keys. The keys are "
"'largest_sef_type' and 'explanation'. The value of the "
"'largest_sef_type' key should be a string that labels the "
"'largest_sef_type' and 'explanation', and 'is_large'. The value "
"of the 'largest_sef_type' key should be a string that labels the "
"largest solar energy system size **regulated by this "
"ordinance**. The value of the 'explanation' key should be a "
"string containing a short explanation for your choice."
"string containing a short explanation for your choice. The value "
"of the 'is_large' key should be the boolean value `true`, since "
"we determined this is a large-scale system."
),
)
G.add_node(
"final_small",
prompt=(
"Respond based on our entire conversation so far. Return your "
"answer as a dictionary in JSON format (not markdown). Your "
"JSON file must include exactly two keys. The keys are "
"'largest_sef_type' and 'explanation', and 'is_large'. The value "
"of the 'largest_sef_type' key should be a string that labels the "
"largest solar energy system size **regulated by this "
"ordinance**. The value of the 'explanation' key should be a "
"string containing a short explanation for your choice. The value "
"of the 'is_large' key should be the boolean value `false`, since "
"we determined this is not a large-scale system."
),
)
return G
Expand Down Expand Up @@ -90,14 +170,10 @@ def setup_multiplier(**kwargs):
"Does the text mention a multiplier that should be applied to the "
"structure height to compute the setback distance from {feature} "
"for {tech}? "
"Focus only on {feature}; do not respond based on any text "
"Please consider only {feature}; do not respond based on any text "
"related to {ignore_features}. "
"Please only consider setbacks specifically for systems that "
"would typically be defined as {tech} based on the text itself "
"— for example, systems intended for electricity generation or "
"sale, or those above thresholds such as height or rated "
"capacity. Ignore any requirements that apply only to smaller "
"or clearly non-commercial systems. "
"Please also only consider setbacks specifically for "
f"{SYSTEM_SIZE_REMINDER}"
"Please start your response with either 'Yes' or 'No' and briefly "
"explain your answer."
),
Expand All @@ -108,14 +184,10 @@ def setup_multiplier(**kwargs):
prompt=(
"Does the ordinance give the setback from {feature} as a fixed "
"distance value? "
"Focus only on {feature}; do not respond based on any text "
"related to {ignore_features}. "
"Please only consider setbacks specifically for systems that "
"would typically be defined as {tech} based on the text itself "
"— for example, systems intended for electricity generation or "
"sale, or those above thresholds such as height or rated "
"capacity. Ignore any requirements that apply only to smaller "
"or clearly non-commercial systems. "
"Please consider only on {feature}; do not respond based on any "
"text related to {ignore_features}. "
"Please also only consider setbacks specifically for "
f"{SYSTEM_SIZE_REMINDER}"
"Please start your response with either 'Yes' or "
"'No' and briefly explain your answer."
),
Expand Down Expand Up @@ -156,8 +228,8 @@ def setup_multiplier(**kwargs):
"of the 'units' key should be a string corresponding to the "
"(standard) units of the setback distance value from {feature} "
"or `null` if there was no such value. "
"As before, focus only on setbacks specifically for systems that "
"would typically be defined as {tech} based on the text itself. "
"As before, focus only on setbacks that would apply for"
f"{SYSTEM_SIZE_REMINDER}"
"{SUMMARY_PROMPT} {SECTION_PROMPT}"
),
)
Expand All @@ -169,12 +241,8 @@ def setup_multiplier(**kwargs):
"Are multiple values given for the multiplier used to "
"compute the setback distance value from {feature} for {tech}? "
"Remember to ignore any text related to {ignore_features}. "
"Focus only on setbacks specifically for systems that would "
"typically be defined as {tech} based on the text itself — for "
"example, systems intended for electricity generation or sale, "
"or those above thresholds such as height or rated capacity. "
"Ignore any requirements that apply only to smaller or clearly "
"non-commercial systems. "
"Please only consider setbacks specifically for "
f"{SYSTEM_SIZE_REMINDER}"
"If so, select and state the largest one. Otherwise, repeat the "
"single multiplier value that was given in the text. "
),
Expand All @@ -187,12 +255,8 @@ def setup_multiplier(**kwargs):
"static distance value that should be added to the result of "
"the multiplication? "
"Remember to ignore any text related to {ignore_features}. "
"Focus only on setbacks specifically for systems that would "
"typically be defined as {tech} based on the text itself — for "
"example, systems intended for electricity generation or sale, "
"or those above thresholds such as height or rated capacity. "
"Ignore any requirements that apply only to smaller or clearly "
"non-commercial systems. "
"Please only consider setbacks specifically for "
f"{SYSTEM_SIZE_REMINDER}"
"Do not confuse this value with static setback requirements. "
"Ignore text with clauses such as 'no lesser than', 'no greater "
"than', 'the lesser of', or 'the greater of'. Please start your "
Expand Down
28 changes: 19 additions & 9 deletions compass/extraction/solar/ordinance.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@
"facilities (SEF), solar energy farms (SEF), solar farms (SF), "
"utility-scale solar energy systems (USES), commercial solar energy "
"systems (CSES), ground-mounted solar energy systems (GSES), "
"alternate energy systems (AES), or similar"
"alternate energy systems (AES), commercial energy production "
"systems (CEPCS), or similar"
)
_SEARCH_TERMS_AND = (
"zoning, siting, setback, system design, and operational "
"requirements/restrictions"
)
_SEARCH_TERMS_OR = _SEARCH_TERMS_AND.replace("and", "or")
_IGNORE_TYPES = (
"private, residential, roof-mounted, micro, small, or medium sized"
"CSP, private, residential, roof-mounted, micro, small, or medium sized"
)


Expand Down Expand Up @@ -175,7 +176,8 @@ def contains_ord_info(self):
def ordinance_text(self):
"""str: Combined ordinance text from the individual chunks"""
logger.debug(
"Grabbing %d chunk(s) from original text at these indices: %s",
"Grabbing %d ordinance chunk(s) from original text at these "
"indices: %s",
len(self._ordinance_chunks),
list(self._ordinance_chunks),
)
Expand Down Expand Up @@ -212,9 +214,12 @@ class SolarPermittedUseDistrictsTextCollector(StructuredLLMCaller):

DISTRICT_PROMPT = (
"You are a legal scholar that reads ordinance text and determines "
"whether the text explicitly details the districts where large "
"solar energy farms are a permitted use. Large solar energy systems "
f"(SES) may also be referred to as {_LARGE_SEF_SYNONYMS}. "
"whether it explicitly contains relevant information to determine the "
"districts (and especially the district names) where large solar "
"energy farms are a permitted use (primary, special, accessory, or "
"otherwise), as well as the districts where large solar energy farms "
"are prohibited entirely. Large solar energy systems (SES) may also "
f"be referred to as {_LARGE_SEF_SYNONYMS}. "
"Do not make any inferences; only answer based on information that "
"is explicitly stated in the text. "
"Note that relevant information may sometimes be found in tables. "
Expand Down Expand Up @@ -287,7 +292,8 @@ def contains_district_info(self):
def permitted_use_district_text(self):
"""str: Combined permitted use districts text from the chunks"""
logger.debug(
"Grabbing %d chunk(s) from original text at these indices: %s",
"Grabbing %d permitted use chunk(s) from original text at these "
"indices: %s",
len(self._district_chunks),
list(self._district_chunks),
)
Expand Down Expand Up @@ -333,6 +339,8 @@ class SolarOrdinanceTextExtractor(BaseTextExtractor):
f"\t{_LARGE_SEF_SYNONYMS.capitalize()}.\n"
"- Explicitly include any text related to **bans or prohibitions** "
"on solar energy systems.\n"
"- Explicitly include any text related to the adoption or enactment "
"date of the ordinance (if any).\n"
"\n2. ## Exclusions ##:\n"
"- Do **not** include text that does not pertain to solar energy "
"systems.\n"
Expand Down Expand Up @@ -426,7 +434,8 @@ class SolarPermittedUseDistrictsTextExtractor(BaseTextExtractor):
"\n1. ## Scope of Extraction ##:\n"
"- Retain all text defining permitted use(s) for a district, "
"including:\n"
"\t- **Primary, Special, Accessory, and other permitted use types.**\n"
"\t- **Primary, Special, Conditional, Accessory, Prohibited, and "
"any other use types.**\n"
"\t- **District names and zoning classifications.**\n"
"- Pay extra attention to any references to **solar energy "
"facilities** or related terms.\n"
Expand Down Expand Up @@ -471,7 +480,8 @@ class SolarPermittedUseDistrictsTextExtractor(BaseTextExtractor):
"\n1. ## Scope of Extraction ##:\n"
"- Retain all text defining permitted use(s) for a district, "
"including:\n"
"\t- **Primary, Special, Accessory, and other permitted use types.**\n"
"\t- **Primary, Special, Conditional, Accessory, Prohibited, and "
"any other use types.**\n"
"\t- **District names and zoning classifications.**\n"
"- Ensure that **tables, lists, and structured elements** are "
"preserved as they may contain relevant details.\n"
Expand Down
Loading