Skip to content

Commit

Permalink
Output markdown (#29)
Browse files Browse the repository at this point in the history
* feat: added function summary

* feat: improvement in models

* feat: improved models

* feat: added dspy modules

* latest dspy pipelines

* fix: commit latest changes

* fix: ready for release

* feat: added ability to give out markdown summary

---------

Co-authored-by: jghiya <[email protected]>
  • Loading branch information
JayGhiya and jghiya authored Jul 1, 2024
1 parent 0c4cd39 commit 3b4c322
Show file tree
Hide file tree
Showing 22 changed files with 250 additions and 687 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,6 @@ unoplat-code-confluence/utility/__pycache__
unoplat-code-confluence/data_models/dspy/__pycache__
unoplat-code-confluence/__pycache__
unoplat-code-confluence/summary_parser/__pycache__
unoplat-code-confluence/dependencies/analysers
unoplat-code-confluence/.env

42 changes: 27 additions & 15 deletions unoplat-code-confluence/__main__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
import argparse
import json
from os import get_terminal_size
import os
from loguru import logger
import datetime
from codebaseparser.ArchGuardHandler import ArchGuardHandler
import re
from data_models.chapi_unoplat_codebase import UnoplatCodebase
from data_models.dspy.dspy_unoplat_codebase_summary import DspyUnoplatCodebaseSummary
from downloader.downloader import Downloader
from dspy_class_summary import CodeConfluenceClassModule
from dspy_codebase_summary import CodeConfluenceCodebaseModule
Expand All @@ -15,7 +15,7 @@
from loader import iload_json, iparse_json
from loader.json_loader import JsonLoader
from loader.parse_json import JsonParser
from nodeparser.nodesummariser import NodeSummariser
from nodeparser.markdownsummariser import MarkdownSummariser
from nodeparser.isummariser import ISummariser
from settings.appsettings import AppSettings
from summary_parser.codebase_summary import CodebaseSummaryParser
Expand All @@ -31,6 +31,7 @@ def get_codebase_metadata(json_configuration_data,settings: AppSettings,iload_js
local_workspace_path = json_configuration_data["local_workspace_path"]
programming_language = json_configuration_data["programming_language"]
output_path_field = json_configuration_data["output_path"]
output_file_name = json_configuration_data["output_file_name"]
codebase_name_field = json_configuration_data["codebase_name"]
github_token = settings.github_token
arcguard_cli_repo = json_configuration_data["repo"]["download_url"]
Expand All @@ -44,6 +45,7 @@ def get_codebase_metadata(json_configuration_data,settings: AppSettings,iload_js
# move this when expanding to new languages
programming_language,
output_path_field,
output_file_name,
codebase_name_field,
github_token,
arcguard_cli_repo,
Expand All @@ -56,9 +58,9 @@ def get_codebase_metadata(json_configuration_data,settings: AppSettings,iload_js
def download_and_continue(settings,manager):
try:
jar_path = Downloader.download_latest_jar(settings.download_url, settings.download_directory, settings.github_token)
print(f"Download completed: {jar_path}")
logger.info(f"Download completed: {jar_path}")
except Exception as e:
print(f"Error during download: {e}")
logger.error(f"Error during download: {e}")
finally:
manager.stop()
return jar_path
Expand All @@ -83,7 +85,8 @@ def ensure_jar_downloaded(github_token,arcguard_cli_repo,local_download_director

return jar_path

def start_parsing(local_workspace_path, settings, programming_language, output_path, codebase_name, github_token, arcguard_cli_repo, local_download_directory, iload_json, iparse_json, isummariser):

def start_parsing(local_workspace_path, settings, programming_language, output_path,output_file_name, codebase_name, github_token, arcguard_cli_repo, local_download_directory, iload_json, iparse_json, isummariser):

# Log the start of the parsing process
logger.info("Starting parsing process...")
Expand Down Expand Up @@ -114,27 +117,38 @@ def start_parsing(local_workspace_path, settings, programming_language, output_p

output_filename = f"{codebase_name}_{current_timestamp}.md"

unoplat_codebase : UnoplatCodebase = iparse_json.parse_json_to_nodes(chapi_metadata, isummariser)
unoplat_codebase : UnoplatCodebase = iparse_json.parse_json_to_nodes(chapi_metadata)

dspy_function_pipeline_summary : CodeConfluenceFunctionModule = CodeConfluenceFunctionModule()

dspy_class_pipeline_summary : CodeConfluenceClassModule = CodeConfluenceClassModule()


dspy_package_pipeline_summary : CodeConfluencePackageModule = CodeConfluencePackageModule()

dspy_codebase_pipeline_summary: CodeConfluenceCodebaseModule = CodeConfluenceCodebaseModule()

dspy_function_pipeline_summary : CodeConfluenceFunctionModule = CodeConfluenceFunctionModule()

dspy_class_pipeline_summary : CodeConfluenceClassModule = CodeConfluenceClassModule()


codebase_summary = CodebaseSummaryParser(unoplat_codebase,dspy_function_pipeline_summary, dspy_class_pipeline_summary,dspy_package_pipeline_summary,dspy_codebase_pipeline_summary,settings)

codebase_summary.parse_codebase()


# with open(os.path.join(output_path, output_filename), 'a+') as md_file:
# for node in iparse_json.parse_json_to_nodes(chapi_metadata, isummariser):
# if node.type == "CLASS":
# md_file.write(f"{node.summary}\n\n")
# with open('codebase_summary.json', 'w') as file:
# json.dump(codebase_metadata, file)
codebase_summary = CodebaseSummaryParser(unoplat_codebase,dspy_function_pipeline_summary, dspy_class_pipeline_summary,dspy_package_pipeline_summary,dspy_codebase_pipeline_summary,settings)

unoplat_codebase_summary: DspyUnoplatCodebaseSummary = codebase_summary.parse_codebase()

# now write to a markdown dspy unoplat codebase summary

markdown_output = isummariser.summarise_to_markdown(unoplat_codebase_summary)
# write the markdown output to a file
with open(os.path.join(output_path, output_filename), 'w') as md_file:
md_file.write(markdown_output)

logger.info("Parsing process completed.")


Expand All @@ -147,11 +161,9 @@ def start_parsing(local_workspace_path, settings, programming_language, output_p

iload_json = JsonLoader()
iparse_json = JsonParser()
isummariser = NodeSummariser()
isummariser = MarkdownSummariser()
#loading the config
json_configuration_data = iload_json.load_json_from_file(args.config)
print(json_configuration_data)

#loading and setting the logging config
logging_config = iload_json.load_json_from_file("loguru.json")
logger.configure(handlers=logging_config["handlers"])
Expand Down
5 changes: 0 additions & 5 deletions unoplat-code-confluence/codeagent/__init__.py

This file was deleted.

22 changes: 0 additions & 22 deletions unoplat-code-confluence/codeagent/current_item.py

This file was deleted.

144 changes: 0 additions & 144 deletions unoplat-code-confluence/codeagent/unoplat_agent.py

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,8 @@

class DspyUnoplatCodebaseSummary(BaseModel):
codebase_summary: Optional[str] = Field(default=None, description="A summary of the codebase")

codebase_objective: Optional[str] = Field(default=None, description="The objective of the codebase")

codebase_name: Optional[str] = Field( default=None,description="The file id of the codebase summary")
codebase_package: Optional[DspyUnoplatPackageSummary] = Field(default=None,description="A summary of the codebase package")
5 changes: 3 additions & 2 deletions unoplat-code-confluence/dspy_class_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from data_models.dspy.dspy_unoplat_node_summary import DspyUnoplatNodeSummary


#TODO: optimise using gpt4 judge and miprov2

class CodeConfluenceClassSummarySignature(dspy.Signature):
"""This signature takes in existing summary of a class and function summary of a class one at a time and returns enhanced final summary."""
class_existing_summary: str = dspy.InputField(default="Summary:",desc="This will contain existing class summary")
Expand Down Expand Up @@ -34,13 +36,12 @@ def forward(self, class_metadata: DspyUnoplatNodeSubset, function_objective_summ
signature_class_summary = self.generate_class_summary(class_existing_summary=class_summary, function_summary=function_objective.function_summary.objective, class_metadata=str(class_metadata.model_dump_json()))
class_summary = signature_class_summary.final_class_summary

print("class summary",class_summary)

if len(function_objective_summary) > 0:
class_objective_signature = self.generate_class_objective(final_class_summary = class_summary)
else:
class_objective_signature = self.generate_class_objective(final_class_summary = class_metadata.content)
print("class objective",class_objective_signature.class_objective)

dspy_class_summary = DspyUnoplatNodeSummary(NodeName=class_metadata.node_name,NodeObjective=class_objective_signature.class_objective, NodeSummary=class_summary,FunctionsSummary=function_objective_summary)

return dspy.Prediction(answer=dspy_class_summary)
Expand Down
1 change: 1 addition & 0 deletions unoplat-code-confluence/dspy_codebase_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@



#TODO: optimise using gpt4 judge and miprov2
class CodeConfluenceCodebaseSignature(dspy.Signature):
"""This signature takes in existing summary of a codebase and package summary of a package one at a time and returns final_codebase_summary as enhanced final summary of codebase"""
codebase_existing_summary: str = dspy.InputField(alias="codebase_existing_summary",default="codebase existing summary:",desc="This will contain existing codebase summary")
Expand Down
5 changes: 2 additions & 3 deletions unoplat-code-confluence/dspy_function_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from data_models.dspy.dspy_unoplat_fs_node_subset import DspyUnoplatNodeSubset


#TODO: optimise using gpt4 judge and miprov2

class CodeConfluenceFunctionSummarySignature(dspy.Signature):
"""This signature takes in class metadata and function metadata with function content and returns objective and descriptive function summaries."""
Expand All @@ -29,9 +30,7 @@ def forward(self, function_metadata: DspyUnoplatFunctionSubset, class_metadata:
class_subset = str(class_metadata.model_dump_json())
function_subset = str(function_metadata.model_dump_json())
code_confluence_function_summary = self.generate_function_summary( dspy_class_subset = class_subset, dspy_function_subset= function_subset)
print("function implementation:",code_confluence_function_summary.function_implementation)
code_confluence_function_objective = self.generate_function_objective(function_implementation=code_confluence_function_summary.function_implementation)
print("function objective:",code_confluence_function_objective.function_objective)
code_confluence_function_objective = self.generate_function_objective(function_implementation=code_confluence_function_summary.function_implementation)
dspy_function_summary = DspyFunctionSummary(Objective=code_confluence_function_objective.function_objective, ImplementationSummary=code_confluence_function_summary.function_implementation)
return dspy.Prediction(answer=dspy_function_summary)

Expand Down
1 change: 1 addition & 0 deletions unoplat-code-confluence/dspy_package_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@



#TODO: optimise using gpt4 judge and miprov2s
class CodeConfluencePackageSignature(dspy.Signature):
"""This signature takes in existing summary of a class and function summary of a class one at a time and returns final enhanced summary"""
package_existing_summary: str = dspy.InputField(default="package existing summary:",desc="This will contain existing package summary")
Expand Down
Loading

0 comments on commit 3b4c322

Please sign in to comment.