Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
8c1bfe4
adding commits for updated python parser
a2un May 31, 2024
a171234
changes to parser and adding .gitignore
a2un Aug 2, 2024
1770bdc
TODO: update the logic in 14
a2un Oct 26, 2024
164a47e
update merge logic
a2un Oct 28, 2024
0779473
handle function call names for keywords
a2un Oct 28, 2024
846af6c
py owl
a2un Apr 30, 2025
9d38a67
new ontology
a2un May 6, 2025
4d2a95b
compare java ontology
a2un May 9, 2025
70c2139
file_converter.py
a2un May 23, 2025
097a5be
update parser name
Jun 26, 2025
bbfa154
update files
a2un Jun 26, 2025
7775a15
app dependencies
a2un Jun 26, 2025
11fec4e
add dependencies
a2un Jun 26, 2025
0df78e6
add dependencies
a2un Jun 26, 2025
74c6a1f
create folder
a2un Jun 26, 2025
c6f8b34
ontology; lab logo
a2un Jun 26, 2025
0e8cfe6
ontology; lab logo
a2un Jun 26, 2025
93bdd1d
concepts
a2un Jul 3, 2025
39f602b
requirements
a2un Jul 3, 2025
29b485d
main_api.py
a2un Jul 3, 2025
4a58316
Dockerfile
a2un Jul 3, 2025
28eed00
add attribute check Fix attribute checks (#1)
a2un Jul 9, 2025
0fd9311
py_keyword_functions fix issue (#1)
a2un Jul 9, 2025
4e013ea
update commands fix #1
a2un Jul 9, 2025
2303323
add curl outputs fix (#1)
a2un Jul 9, 2025
182ce66
update docker scripts Fix (#1)
a2un Jul 11, 2025
a4b569a
update readme docker script Fix (#1)
a2un Jul 11, 2025
9dfb95f
add elif else Fix #1
a2un Jul 11, 2025
4004b83
update the get method
a2un Jul 29, 2025
8eaab7d
image
a2un Aug 5, 2025
02a674c
make concepts readable; change from list to row format; match concept…
a2un Aug 20, 2025
082b91c
variable names for api should not be string and deselecting ones that…
a2un Aug 20, 2025
de6059f
update readme
a2un Aug 20, 2025
7426679
add um2 concepts
a2un Aug 20, 2025
556e083
add parameter calls
a2un Aug 20, 2025
886187f
handle nested if
a2un Aug 21, 2025
de9a0d3
add "for in" loop ast.name id
a2un Jan 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
pcex-python-code/
readingmirror-data-files/
chap*/
*.csv
pcex/
parsons/
quizpet/
py-files
temp_code/
*.json
*.pyc
*/__pycache__/*
__pycache__
11 changes: 11 additions & 0 deletions Dockerfile.fastapi
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM python:3.9.22

WORKDIR /code

COPY ./requirements.txt /code/requirements.txt

RUN pip install --progress-bar off --no-cache-dir --upgrade -r /code/requirements.txt

COPY ./ /code/

CMD ["fastapi", "run", "main_api.py", "--port", "13456"]
13 changes: 13 additions & 0 deletions Dockerfile.streamlit
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
FROM python:3.9-slim

WORKDIR /code

COPY ./ /code

RUN pip install --progress-bar off --no-cache-dir --upgrade -r /code/requirements.txt

EXPOSE 13457

HEALTHCHECK CMD curl --fail http://localhost:13457/_stcore/health

ENTRYPOINT ["streamlit", "run", "streamlit_main.py", "--server.port=13457", "--server.address=0.0.0.0"]
34 changes: 34 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,37 @@
This Acos server tool parses Python 3 code and
returns the found concepts by traversing the AST
of the code.


## Requires
Python==3.9.22

## To run frontend
python3.9 -m streamlit run streamlit_main.py

## To run API
python3.9 -m uvicorn main_api:app --reload-include="main_api.py" --reload-exclude="*/py-files/*"

## Deployment on PAWSComp

```
sudo su
source setup_docker.sh
exit
```

Then to test

```
source test_docker.sh
```

Expected output:
```
[{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Import","aggregate_context_name":"Import","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0},{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Alias","aggregate_context_name":"Alias","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0}]
[{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Import","aggregate_context_name":"Import","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0},{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Numeric-or-string-or-collection-assignment","aggregate_context_name":"Numeric-or-string-or-collection-assignment","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0},{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Int","aggregate_context_name":"Int","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0},{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Alias","aggregate_context_name":"Alias","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0}]
[{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Numeric-or-string-or-collection-assignment","aggregate_context_name":"Numeric-or-string-or-collection-assignment","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0}]
[{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Numeric-or-string-or-collection-assignment","aggregate_context_name":"Numeric-or-string-or-collection-assignment","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0},{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Slice","aggregate_context_name":"Slice","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"**contributesK**":0},{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Int","aggregate_context_name":"Int","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0}]

ok
```
200 changes: 200 additions & 0 deletions concepts_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
# *****************************************************************************
# Python AST parser for the ADL project
# Version 0.1.0, Teemu Sirkia

# Reads a given Python program and creates a JSON object
# describing line-by-line which language elements exist
# in the code.
#
# For the list of the available nodes, see:
# https://docs.python.org/3/library/ast.html#abstract-grammar
# *****************************************************************************

import os
from os import path
from tqdm import tqdm
import pandas as pd
from utils import *
from datetime import datetime
from version import __version__


def main(local=True,filename='./py-files/chap2/sec_2_7.py',mode='simple'):
#TODO 5 -- create table of concepts -- matching tags -- present example integration from concept to textbook
#TODO 1 -- expressions with parentheses -- simple / complex expression -- section 2.7
#TODO 2 -- operator overload -- section 2.9 -- instead of Add -- Consider StrAdd
#TODO 3 -- input function -- section 2.10
#TODO 4 -- comments

code,codelines,mode = read_input_files(local,filename,mode)

nodes = {'lines' : {}}

try:

tree = ast.parse(code)
# tokens = lexer_tokens(code)

startNode = {'name': 'root', 'children': []}

# Traverse all the nodes in the AST

if mode == 'complex':
for node in ast.iter_child_nodes(tree):
complexTraverse(node, 0, nodes)
elif mode == 'hierarchical':
for node in ast.iter_child_nodes(tree):
hierarchicalTraverse(node, 0, startNode)
elif mode in ('simple', 'concepts','ast_walk'):
for node in ast.iter_child_nodes(tree):
simpleTraverse(node, 0, nodes)
else:
print('Parsing failed!\n\nError occurred: Unknown parsing mode', file=sys.stderr)
sys.exit(1)

# Convert sets to lists before JSON transformation
if mode == 'simple' or mode == 'complex':
for line in nodes['lines']:
nodes['lines'][line] = list(nodes['lines'][line])
elif mode == 'hierarchical':
nodes = startNode
elif mode == 'concepts':
concepts = set()
for line in nodes['lines']:
for concept in list(nodes['lines'][line]):
concepts.add(concept)
nodes = list(concepts)
elif mode == 'ast_walk':

concepts = set()

for node in ast.walk(tree):
print(node.__dict__)

if not(local):
print(json.dumps(nodes))

if local:
return merge_lines_nodes(nodes)#.union(tokens)

except Exception as e:
print('Parsing failed!\n\nError occurred: ' + str(e), file=sys.stderr)
# print(re.split(r'[<,\s,>,\']',str(type(e)))[3], f'line no {e.args[1][1]}')
if not(local): sys.exit(1)

def post_process_parser(response,fname='tmp1.py',activity_id = pd.NA):
concept_ids = json.load(open('./static/um2_python_concept_ids.json','r'))
section_concepts = []
for concept in list(response):
# format as per aggregate.kc_content_component
# concept id from um2.ent_concept + um2.rel_concept_activity for python only
# activity id provided by teh user
section_concepts.append(
{
'aggregate_id' : pd.NA,
'um2_activity_id': activity_id,
'aggregate_content_name': path.splitext(fname)[0],
'um2_concept_id' : concept_ids[concept] if concept in concept_ids else pd.NA,
'aggregate_component_name':concept,
'aggregate_context_name': concept,
'aggregate_domain':'py',
'um2_aggregate_weight': 1,
'aggregate_active': 1,
'um2_direction': 0,
'aggregate_source_method': f'Arun Parser v{__version__}',
'um2_concept_description': f'Arun Parser v{__version__}',
'importance':0,
'contributesK':0
# 'date_added': datetime.today().strftime('%Y-%m-%d %H:%M:%S')
}
)
return section_concepts


if __name__ == '__main__':
local = True
## TODO handle case for non local (such as server api setup)
if local:
PYTHON_TEXTBOOK_EXAMPLE_LIST = ['parsons','quizpet']#['pcex','py-files','pcex','pcex-python-code','quizpet','parsons']
SMART_CONTENT_LIST = ['pcex','quizpet','pcex-python-code','parsons']

for PYTHON_TEXTBOOK_EXAMPLES in PYTHON_TEXTBOOK_EXAMPLE_LIST:
print("processing",PYTHON_TEXTBOOK_EXAMPLES)
section_concepts = {}
if PYTHON_TEXTBOOK_EXAMPLES in SMART_CONTENT_LIST:
section_concepts ['content_name']= []
if PYTHON_TEXTBOOK_EXAMPLES == 'py-files':
section_concepts['content_id'] = []
section_concepts ['section_id']= []

section_concepts['concept'] = []

for root,curr_dir,files in os.walk(f'./{PYTHON_TEXTBOOK_EXAMPLES}/'):
for fname in tqdm(files):
if path.splitext(fname)[1] == '.py':
try:
codelines,response = main(local,path.join(root,fname))
# print(response)
if PYTHON_TEXTBOOK_EXAMPLES == 'py-files':
section_concepts['content_id'].append(section_concepts['content_id'][-1]+1 if len(section_concepts['content_id']) >0 else 143)
section_concepts['section_id'].append(path.splitext(fname)[0])
if PYTHON_TEXTBOOK_EXAMPLES in SMART_CONTENT_LIST:
section_concepts['content_name'].append(path.splitext(fname)[0])
section_concepts['concept'].append('_'.join(list(response)))
# print(section_concepts)

except Exception:
print(root,fname)
# print(response['lines'])
# print(codelines)

smart_concepts_sections = pd.DataFrame.from_dict(section_concepts)#.sort_values(by='section_id')
# smart_concepts_sections.loc[:,'date_updated'] = pd.to_datetime('today')
# smart_concepts_sections = smart_concepts_sections.explode('concept')

# if PYTHON_TEXTBOOK_EXAMPLES == 'py-files':
# db = pd.read_csv('./readingmirror-data-files/smart_learning_content_section.csv')

# if PYTHON_TEXTBOOK_EXAMPLES in SMART_CONTENT_LIST:
# db = pd.read_csv(f'./readingmirror-data-files/smart_learning_content_concepts.csv')
timestamp = pd.to_datetime('today').strftime('%Y%m%d%H%M%S')

if PYTHON_TEXTBOOK_EXAMPLES == 'py-files':
smart_concepts_sections.loc[:,'resource_id'] = 'pfe'
smart_concepts_sections.loc[:,'is_active'] = 1
smart_concepts_sections.loc[:,'date_added'] = '2024-06-23 19:40:02'
smart_concepts_sections.to_csv('./smart_learning_content_section.csv',index=False)

if PYTHON_TEXTBOOK_EXAMPLES in SMART_CONTENT_LIST:
smart_concepts_sections.loc[:,'domain']='py'
smart_concepts_sections.loc[:,'weight']=1
smart_concepts_sections.loc[:,'active']=1
smart_concepts_sections.loc[:,'source_method']='parser'
smart_concepts_sections.loc[:,'importance']=1
smart_concepts_sections.loc[:,'contributesK']=1
smart_concepts_sections.loc[:,'component_name'] = smart_concepts_sections.loc[:,'concept']
smart_concepts_sections.loc[:,'context_name'] = smart_concepts_sections.loc[:,'concept']
smart_concepts_sections[[x for x in smart_concepts_sections.columns if not(x == 'concept')]].to_csv(f'./smart_learning_content_concepts_{PYTHON_TEXTBOOK_EXAMPLES}_{timestamp}.csv',index=False)



# type: ignore


## TODO something from outcomes nothing beyong
## TODO why is it being allocated this way -- indexing mistake ?
## TODO all the worksexamples -- get the py





### Parser gives all the concepts -- new section / new concepts
### update the database for chatper sections
### update the smartcontent database
### filter in a separate -- no from future (before or present)
### filter smart contne database -- no from future (before or present)




32 changes: 32 additions & 0 deletions file_creator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import pandas as pd
from tqdm import tqdm

line_numbers = []
with open('./parsons/parsons_codes.txt','r') as f:
counter = 0
for line in f.readlines():
counter += 1
if line.startswith('ps_'):
line_numbers.append(counter)

with open('./parsons/parsons_codes.txt','r') as f:
lines = f.readlines()
for ind in tqdm(range(0,len(line_numbers))):
line_number = line_numbers[ind]-1
code_filename = lines[line_number]
code = lines[line_number+2:line_numbers[ind+1]-2] if ind+1 < len(line_numbers) else lines[line_number+2:]

with open(f'./parsons/{code_filename.strip()}.py','w+') as f:
for line in code:
f.write(line)



quizpet_codes = pd.read_csv('./quizpet/quizpet_codes.csv')


for row, ind in tqdm(quizpet_codes.iterrows()):
code = ind['code']
code_filename = ind['rdfID']
with open(f'./quizpet/{code_filename}.py','w+') as f:
f.write(code)
62 changes: 62 additions & 0 deletions main_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from concepts_parser import main, post_process_parser
import os
from version import __version__

app = FastAPI()

class CodeString(BaseModel):
"""
Custom type for code strings.
This can be extended with validation if needed.
"""
aggregate_id :int = 11111 ## Leave this blank if for insert query, otherwise provide id column in aggregate_kc_content_component
um2_activity_id:int = 11111 ## Leave this blank if for insert query, otherwise provide id column in um2_ent_activity,
aggregate_content_name:str = 'tmp' ### if use content_name as stored in um2_ent_activity,
code_str:str
# um2_concept_id : int ## this will be generated from backend
# aggregate_component_name:str
# aggregate_context_name: str
# aggregate_domain:str = 'py' ## defaults to python
# um2_aggregate_weight:int = 1
# aggregate_active:int = 1
# um2_direction: int = 0
# aggregate_source_method:str = f'Arun Parser v{__version__}'
# um2_concept_description:str = f'Arun Parser v{__version__}'
# importance:int = 0
# contributesK:int = 0

@app.get("/test_api")
async def test_api():
return {"message": "API is working!"}


@app.post("/extract_concepts")
async def extract_concepts(code_json: CodeString):
print(code_json)

if code_json is None:
raise HTTPException(status_code=400, detail="Code string cannot be empty")
try:
code_str = code_json.code_str.strip()
# Create a temporary directory for the code file
if not os.path.exists('./py-files'):
os.mkdir('py-files')

# Write the code to a temporary file
with open('./py-files/tmp1.py', 'w+') as f:
f.write(code_str)

# Extract concepts from the code
response = main(filename='py-files/tmp1.py')

# Clean up the temporary files
os.remove('./py-files/tmp1.py')
os.rmdir('./py-files')

response_df = post_process_parser(response,code_json.aggregate_content_name,code_json.um2_activity_id)
return response_df

except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
Loading