acos-server · a2un · May 31, 2024 · Aug 2, 2024 · Oct 26, 2024 · Oct 28, 2024
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,13 @@
+pcex-python-code/
+readingmirror-data-files/
+chap*/
+*.csv
+pcex/
+parsons/
+quizpet/
+py-files
+temp_code/
+*.json
+*.pyc
+*/__pycache__/*
+__pycache__
diff --git a/Dockerfile.fastapi b/Dockerfile.fastapi
@@ -0,0 +1,11 @@
+FROM python:3.9.22
+
+WORKDIR /code
+
+COPY ./requirements.txt /code/requirements.txt
+
+RUN pip install --progress-bar off --no-cache-dir --upgrade -r /code/requirements.txt
+
+COPY ./ /code/
+
+CMD ["fastapi", "run", "main_api.py", "--port", "13456"]
diff --git a/Dockerfile.streamlit b/Dockerfile.streamlit
@@ -0,0 +1,13 @@
+FROM python:3.9-slim
+
+WORKDIR /code
+
+COPY ./ /code
+
+RUN pip install --progress-bar off --no-cache-dir --upgrade -r /code/requirements.txt
+
+EXPOSE 13457
+
+HEALTHCHECK CMD curl --fail http://localhost:13457/_stcore/health
+
+ENTRYPOINT ["streamlit", "run", "streamlit_main.py", "--server.port=13457", "--server.address=0.0.0.0"]
diff --git a/README.md b/README.md
@@ -3,3 +3,37 @@
 This Acos server tool parses Python 3 code and
 returns the found concepts by traversing the AST
 of the code.
+
+
+## Requires
+Python==3.9.22
+
+## To run frontend
+python3.9 -m streamlit run streamlit_main.py
+
+## To run API
+python3.9 -m uvicorn main_api:app --reload-include="main_api.py" --reload-exclude="*/py-files/*"
+
+## Deployment on PAWSComp
+
+```
+sudo su
+source setup_docker.sh
+exit
+```
+
+Then to test
+
+```
+source test_docker.sh
+```
+
+Expected output:
+```
+[{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Import","aggregate_context_name":"Import","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0},{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Alias","aggregate_context_name":"Alias","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0}]
+[{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Import","aggregate_context_name":"Import","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0},{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Numeric-or-string-or-collection-assignment","aggregate_context_name":"Numeric-or-string-or-collection-assignment","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0},{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Int","aggregate_context_name":"Int","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0},{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Alias","aggregate_context_name":"Alias","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0}]
+[{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Numeric-or-string-or-collection-assignment","aggregate_context_name":"Numeric-or-string-or-collection-assignment","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0}]
+[{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Numeric-or-string-or-collection-assignment","aggregate_context_name":"Numeric-or-string-or-collection-assignment","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0},{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Slice","aggregate_context_name":"Slice","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"**contributesK**":0},{"aggregate_id":{},"um2_activity_id":{},"aggregate_content_name":"tmp1","um2_concept_id":{},"aggregate_component_name":"Int","aggregate_context_name":"Int","aggregate_domain":"py","um2_aggregate_weight":1,"aggregate_active":1,"um2_direction":0,"aggregate_source_method":"Arun Parser v0.3","um2_concept_description":"Arun Parser v0.3","importance":0,"contributesK":0}]
+
+ok
+```
diff --git a/concepts_parser.py b/concepts_parser.py
@@ -0,0 +1,200 @@
+# *****************************************************************************
+# Python AST parser for the ADL project
+# Version 0.1.0, Teemu Sirkia
+
+# Reads a given Python program and creates a JSON object
+# describing line-by-line which language elements exist
+# in the code.
+#
+# For the list of the available nodes, see:
+# https://docs.python.org/3/library/ast.html#abstract-grammar
+# *****************************************************************************
+
+import os
+from os import path
+from tqdm import tqdm
+import pandas as pd
+from utils import *
+from datetime import datetime
+from version import __version__
+
+
+def main(local=True,filename='./py-files/chap2/sec_2_7.py',mode='simple'):
+    #TODO 5 -- create table of concepts -- matching tags -- present example integration from concept to textbook
+    #TODO 1 -- expressions with parentheses -- simple / complex expression -- section 2.7
+    #TODO 2 -- operator overload -- section 2.9 -- instead of Add -- Consider StrAdd
+    #TODO 3 -- input function -- section 2.10
+    #TODO 4 -- comments
+
+    code,codelines,mode  = read_input_files(local,filename,mode)
+
+    nodes = {'lines' : {}}
+
+    try:
+
+        tree = ast.parse(code)
+        # tokens = lexer_tokens(code)
+
+        startNode = {'name': 'root', 'children': []}
+
+        # Traverse all the nodes in the AST
+
+        if mode == 'complex':
+            for node in ast.iter_child_nodes(tree):
+                complexTraverse(node, 0, nodes)
+        elif mode == 'hierarchical':
+            for node in ast.iter_child_nodes(tree):
+                hierarchicalTraverse(node, 0, startNode)
+        elif mode in ('simple', 'concepts','ast_walk'):
+            for node in ast.iter_child_nodes(tree):
+                simpleTraverse(node, 0, nodes)
+        else:
+            print('Parsing failed!\n\nError occurred: Unknown parsing mode', file=sys.stderr)
+            sys.exit(1)
+
+        # Convert sets to lists before JSON transformation
+        if mode == 'simple' or mode == 'complex':
+            for line in nodes['lines']:
+                nodes['lines'][line] = list(nodes['lines'][line])
+        elif mode == 'hierarchical':
+                nodes = startNode
+        elif mode == 'concepts':
+            concepts = set()
+            for line in nodes['lines']:
+                for concept in list(nodes['lines'][line]):
+                    concepts.add(concept)
+            nodes = list(concepts)
+        elif mode == 'ast_walk':
+
+            concepts = set()
+
+            for node in ast.walk(tree):
+                print(node.__dict__)
+
+        if not(local):
+            print(json.dumps(nodes))
+
+        if local:
+            return merge_lines_nodes(nodes)#.union(tokens)
+
+    except Exception as e:
+        print('Parsing failed!\n\nError occurred: ' + str(e), file=sys.stderr)
+        # print(re.split(r'[<,\s,>,\']',str(type(e)))[3], f'line no {e.args[1][1]}')
+        if not(local): sys.exit(1)
+
+def post_process_parser(response,fname='tmp1.py',activity_id = pd.NA):
+    concept_ids = json.load(open('./static/um2_python_concept_ids.json','r'))
+    section_concepts = []
+    for concept in list(response):
+        # format as per aggregate.kc_content_component
+        # concept id from um2.ent_concept + um2.rel_concept_activity for python only
+        # activity id provided by teh user
+        section_concepts.append(
+           {   
+              'aggregate_id' : pd.NA,
+              'um2_activity_id': activity_id,
+              'aggregate_content_name': path.splitext(fname)[0],
+              'um2_concept_id' : concept_ids[concept] if concept in concept_ids else pd.NA,
+              'aggregate_component_name':concept,
+              'aggregate_context_name': concept,
+              'aggregate_domain':'py',
+              'um2_aggregate_weight': 1,
+              'aggregate_active': 1,
+              'um2_direction': 0,
+              'aggregate_source_method': f'Arun Parser v{__version__}',
+              'um2_concept_description': f'Arun Parser v{__version__}',
+              'importance':0,
+              'contributesK':0
+            #   'date_added': datetime.today().strftime('%Y-%m-%d %H:%M:%S')
+           }
+        )
+    return section_concepts
+
+
+if __name__ == '__main__':
+    local = True
+    ## TODO handle case for non local (such as server api setup)
+    if local: 
+        PYTHON_TEXTBOOK_EXAMPLE_LIST = ['parsons','quizpet']#['pcex','py-files','pcex','pcex-python-code','quizpet','parsons']
+        SMART_CONTENT_LIST = ['pcex','quizpet','pcex-python-code','parsons']
+
+        for PYTHON_TEXTBOOK_EXAMPLES in PYTHON_TEXTBOOK_EXAMPLE_LIST:
+            print("processing",PYTHON_TEXTBOOK_EXAMPLES)
+            section_concepts = {}
+            if PYTHON_TEXTBOOK_EXAMPLES in SMART_CONTENT_LIST:
+                section_concepts ['content_name']= []
+            if PYTHON_TEXTBOOK_EXAMPLES == 'py-files':
+                section_concepts['content_id'] = []
+                section_concepts ['section_id']= []
+
+            section_concepts['concept'] = []
+
+            for root,curr_dir,files in os.walk(f'./{PYTHON_TEXTBOOK_EXAMPLES}/'):
+                for fname in tqdm(files):
+                    if path.splitext(fname)[1] == '.py':
+                        try:
+                            codelines,response =  main(local,path.join(root,fname))
+                            # print(response)
+                            if PYTHON_TEXTBOOK_EXAMPLES == 'py-files':
+                                section_concepts['content_id'].append(section_concepts['content_id'][-1]+1 if len(section_concepts['content_id']) >0 else 143)
+                                section_concepts['section_id'].append(path.splitext(fname)[0])
+                            if PYTHON_TEXTBOOK_EXAMPLES in SMART_CONTENT_LIST:
+                                section_concepts['content_name'].append(path.splitext(fname)[0])
+                            section_concepts['concept'].append('_'.join(list(response)))
+                            # print(section_concepts)
+
+                        except Exception:
+                            print(root,fname)
+                # print(response['lines'])
+            # print(codelines)
+
+            smart_concepts_sections = pd.DataFrame.from_dict(section_concepts)#.sort_values(by='section_id')
+            # smart_concepts_sections.loc[:,'date_updated'] = pd.to_datetime('today')
+            # smart_concepts_sections = smart_concepts_sections.explode('concept')
+
+            # if PYTHON_TEXTBOOK_EXAMPLES == 'py-files': 
+            #     db = pd.read_csv('./readingmirror-data-files/smart_learning_content_section.csv')
+
+            # if PYTHON_TEXTBOOK_EXAMPLES in SMART_CONTENT_LIST:
+            #     db = pd.read_csv(f'./readingmirror-data-files/smart_learning_content_concepts.csv')
+            timestamp = pd.to_datetime('today').strftime('%Y%m%d%H%M%S')
+
+            if PYTHON_TEXTBOOK_EXAMPLES == 'py-files':
+                smart_concepts_sections.loc[:,'resource_id'] = 'pfe'
+                smart_concepts_sections.loc[:,'is_active'] = 1
+                smart_concepts_sections.loc[:,'date_added'] = '2024-06-23 19:40:02'
+                smart_concepts_sections.to_csv('./smart_learning_content_section.csv',index=False)
+
+            if PYTHON_TEXTBOOK_EXAMPLES in SMART_CONTENT_LIST:
+                smart_concepts_sections.loc[:,'domain']='py'
+                smart_concepts_sections.loc[:,'weight']=1
+                smart_concepts_sections.loc[:,'active']=1
+                smart_concepts_sections.loc[:,'source_method']='parser'
+                smart_concepts_sections.loc[:,'importance']=1
+                smart_concepts_sections.loc[:,'contributesK']=1
+                smart_concepts_sections.loc[:,'component_name'] = smart_concepts_sections.loc[:,'concept']
+                smart_concepts_sections.loc[:,'context_name'] = smart_concepts_sections.loc[:,'concept']
+                smart_concepts_sections[[x for x in smart_concepts_sections.columns if not(x == 'concept')]].to_csv(f'./smart_learning_content_concepts_{PYTHON_TEXTBOOK_EXAMPLES}_{timestamp}.csv',index=False)
+
+
+
+ # type: ignore
+
+
+## TODO something from outcomes nothing beyong
+## TODO why is it being allocated this way -- indexing mistake ?
+## TODO all the worksexamples  -- get the py 
+
+
+
+
+
+### Parser gives all the concepts -- new section / new concepts
+### update the database for chatper sections 
+### update the smartcontent database  
+### filter in a separate -- no from future (before or present)
+### filter smart contne database -- no from future (before or present)
+
+
+
+
diff --git a/file_creator.py b/file_creator.py
@@ -0,0 +1,32 @@
+import pandas as pd
+from tqdm import tqdm
+
+line_numbers = []
+with open('./parsons/parsons_codes.txt','r') as f:
+    counter = 0
+    for line in f.readlines():
+        counter += 1
+        if line.startswith('ps_'):
+              line_numbers.append(counter)
+
+with open('./parsons/parsons_codes.txt','r') as f:
+    lines = f.readlines()
+    for ind in tqdm(range(0,len(line_numbers))):
+        line_number = line_numbers[ind]-1
+        code_filename = lines[line_number]
+        code  = lines[line_number+2:line_numbers[ind+1]-2] if ind+1 < len(line_numbers) else lines[line_number+2:]
+
+        with open(f'./parsons/{code_filename.strip()}.py','w+') as f:
+            for line in code:
+                f.write(line)
+
+
+
+quizpet_codes = pd.read_csv('./quizpet/quizpet_codes.csv')
+
+
+for row, ind in tqdm(quizpet_codes.iterrows()):
+    code = ind['code']
+    code_filename = ind['rdfID']
+    with open(f'./quizpet/{code_filename}.py','w+') as f:
+        f.write(code)
diff --git a/main_api.py b/main_api.py
@@ -0,0 +1,62 @@
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+from concepts_parser import main, post_process_parser
+import os
+from version import __version__
+
+app = FastAPI()
+
+class CodeString(BaseModel):
+    """
+    Custom type for code strings.
+    This can be extended with validation if needed.
+    """
+    aggregate_id :int = 11111 ## Leave this blank if for insert query, otherwise provide id column in aggregate_kc_content_component
+    um2_activity_id:int = 11111 ## Leave this blank if for insert query, otherwise provide id column in um2_ent_activity,
+    aggregate_content_name:str =  'tmp' ### if use content_name as stored in um2_ent_activity,
+    code_str:str
+    # um2_concept_id : int ## this will be generated from backend
+    # aggregate_component_name:str
+    # aggregate_context_name: str
+    # aggregate_domain:str = 'py' ## defaults to python
+    # um2_aggregate_weight:int =  1
+    # aggregate_active:int = 1
+    # um2_direction: int = 0
+    # aggregate_source_method:str = f'Arun Parser v{__version__}'
+    # um2_concept_description:str =  f'Arun Parser v{__version__}'
+    # importance:int = 0
+    # contributesK:int = 0
+
+@app.get("/test_api")
+async def test_api():
+    return {"message": "API is working!"}
+
+
+@app.post("/extract_concepts")
+async def extract_concepts(code_json: CodeString):
+    print(code_json)
+
+    if code_json is None:
+        raise HTTPException(status_code=400, detail="Code string cannot be empty")
+    try:
+        code_str = code_json.code_str.strip()
+        # Create a temporary directory for the code file
+        if not os.path.exists('./py-files'):
+            os.mkdir('py-files')
+
+        # Write the code to a temporary file
+        with open('./py-files/tmp1.py', 'w+') as f:
+            f.write(code_str)
+
+        # Extract concepts from the code
+        response = main(filename='py-files/tmp1.py')
+
+        # Clean up the temporary files
+        os.remove('./py-files/tmp1.py')
+        os.rmdir('./py-files')
+
+        response_df =  post_process_parser(response,code_json.aggregate_content_name,code_json.um2_activity_id)
+        return response_df
+
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))