Skip to content

Commit

Permalink
Merge pull request #11 from IBM/10-extract-api-methods
Browse files Browse the repository at this point in the history
10 extract api methods
  • Loading branch information
PiotrAniola82 authored Oct 30, 2024
2 parents 7273643 + ef2556d commit 8773b79
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 72 deletions.
111 changes: 70 additions & 41 deletions javacore_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import tempfile
import traceback
import zipfile
from pathlib import Path

import py7zr

Expand All @@ -22,25 +23,19 @@

LOGGING_FORMAT = '%(asctime)s [%(levelname)s][%(filename)s:%(lineno)s] %(message)s'

SUPPORTED_ARCHIVES_FORMATS = {"zip", "gz", "tgz", "bz2", "lzma", "7z"}

def create_output_files_structure(output_dir):
if not os.path.isdir(output_dir):
os.mkdir(output_dir)
data_dir = output_dir + '/data'
if os.path.isdir(data_dir):
shutil.rmtree(data_dir, ignore_errors=True)
logging.info("Data dir: " + data_dir)
shutil.copytree("data", data_dir, dirs_exist_ok=True)


def create_file_logging(output_param):
logging_file = output_param + "/wait2-debug.log"

def create_file_logging(logging_file_dir):
logging_file = logging_file_dir + "/wait2-debug.log"
Path(logging_file_dir).mkdir(parents=True, exist_ok=True) # Sometimes the folder of logging might not exist
file_handler = logging.FileHandler(logging_file, mode='w')
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(logging.Formatter(LOGGING_FORMAT))
logging.getLogger().addHandler(file_handler)


def extract_archive(input_archive_filename, output_path):
"""
Expand Down Expand Up @@ -102,44 +97,78 @@ def main():
output_param = args.output_param
files_separator = args.separator

# Location when we store extracted archive or copied javacore files
javacores_temp_dir = tempfile.TemporaryDirectory()
javacores_temp_dir_name = javacores_temp_dir.name

try:
logging.info("Input parameter: " + input_param)
logging.info("Report directory: " + output_param)
logging.info("Input parameter: " + input_param)
logging.info("Report directory: " + output_param)

create_output_files_structure(output_param)
# Needs to be created once output file structure is ready.
create_file_logging(output_param)

# Needs to be created once output file structure is ready.
create_file_logging(output_param)
# Check whether as input we got list of files or single file
# Semicolon is separation mark for list of input files
if files_separator in input_param or fnmatch.fnmatch(input_param, '*javacore*.txt'):
# Process list of the files (copy all or them to output dir
files = input_param.split(files_separator)
else:
files = [input_param]

# Check whether as input we got list of files or single file
# Colon is separation mark for list of input files
if files_separator in input_param or fnmatch.fnmatch(input_param, '*javacore*.txt'):
# Process list of the files (copy all or them to output dir
files = input_param.split(files_separator)
for file in files:
file = file.strip()
shutil.copy2(file, javacores_temp_dir_name)
path = javacores_temp_dir_name
elif os.path.isdir(input_param):
path = input_param # We do not want to copy the files to temp dir is an input is a dir
elif os.path.isfile(input_param):
path = extract_archive(input_param, javacores_temp_dir_name) # Extract archive to temp dir
else:
logging.error(
"The specified parameter " + input_param + " is not a file or a directory. Cannot process it. Exiting")
exit(13)
JavacoreSet.process_javacores_dir(path, output_param)
try:
process_javacores_and_generate_report_data(files, output_param)
except Exception as ex:
traceback.print_exc(file=sys.stdout)
logging.error("Processing was not successful. Correct the problem and try again. Exiting with error 13",
exc_info=True)
exit(13)
finally:
javacores_temp_dir.cleanup()


# Assisted by WCA@IBM
# Latest GenAI contribution: ibm/granite-8b-code-instruct
def generate_javecore_set_data(files):
"""
Generate JavacoreSet data from given files.
Parameters:
- files (list): List of file paths to process. Can be directories or individual files.
Returns:
- JavacoreSet: Generated JavacoreSet object containing the processed data.
"""

# Location when we store extracted archive or copied javacores files
javacores_temp_dir = tempfile.TemporaryDirectory()
# It is strange but sometimes the temp directory contains the content from previous run
# javacores_temp_dir.cleanup()
javacores_temp_dir_name = javacores_temp_dir.name
for file in files:
# file = file.strip() # Remove leading or trailing space in file path
if os.path.isdir(file):
shutil.copytree(file, javacores_temp_dir_name, dirs_exist_ok=True)
else:
filename, extension = os.path.splitext(file)
extension = extension[1:] # trim trailing "."
if extension.lower() in SUPPORTED_ARCHIVES_FORMATS:
extract_archive(file, javacores_temp_dir_name) # Extract archive to temp dir
else:
shutil.copy2(file, javacores_temp_dir_name)
return JavacoreSet.process_javacores(javacores_temp_dir_name)



# Assisted by WCA@IBM
# Latest GenAI contribution: ibm/granite-8b-code-instruct
def process_javacores_and_generate_report_data(input_files, output_dir):
"""
Processes Java core dump files and generates report data.
Parameters:
input_files (list): A list of paths to Java core dump files.
output_dir (str): The directory where the generated report data will be saved.
Returns:
None
"""
javacore_set = generate_javecore_set_data(input_files)
javacore_set.generate_report_files(output_dir)



if __name__ == "__main__":
Expand Down
116 changes: 88 additions & 28 deletions javacore_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def __init__(self, path):
self.verbose_gc_files = []
self.threads = SnapshotCollectionCollection(Thread)
self.stacks = SnapshotCollectionCollection(CodeSnapshotCollection)
self.report_xml_file = None

self.doc = None

Expand All @@ -64,42 +65,74 @@ def __init__(self, path):
self.tips = []
self.gc_parser = VerboseGcParser()

def process_javacores_dir(input_path, output_path):
# Assisted by WCA@IBM
# Latest GenAI contribution: ibm/granite-8b-code-instruct
def process_javacores(input_path):
"""
Processes Java core data and generates tips based on the analysis.
Args:
input_path (str): The path to the directory containing the Javacore data.
Returns:
JavacoreSet: A JavacoreSet object containing the analysis results.
"""
jset = JavacoreSet.create(input_path)
jset.print_java_settings()
jset.populate_snapshot_collections()
jset.sort_snapshots()
# jset.find_top_blockers()
jset.print_blockers()
jset.print_thread_states()
jset.generate_tips()
return jset


# Assisted by WCA@IBM
# Latest GenAI contribution: ibm/granite-8b-code-instruct
def generate_report_files(self, output_dir):
"""
Generate report files in HTML format.
Parameters:
- output_dir (str): The directory where the generated report files will be saved.
Returns:
- None
"""
temp_dir = tempfile.TemporaryDirectory()
try:
jset = JavacoreSet.create(input_path)
jset.print_java_settings()
jset.populate_snapshot_collections()
jset.sort_snapshots()
# jset.find_top_blockers()
jset.print_blockers()
jset.print_thread_states()
jset.generate_tips()
output_dir = output_path
temp_dir_name = temp_dir.name
logging.info("Created temp dir: " + temp_dir_name)
jset.create_report_xml(temp_dir_name + "/report.xml")
jset.generate_htmls_for_threads(output_dir, temp_dir_name)
jset.generate_htmls_for_javacores(output_dir, temp_dir_name)
jset.create_index_html(temp_dir_name, output_dir)
finally:
temp_dir.cleanup()
temp_dir_name = temp_dir.name
logging.info("Created temp dir: " + temp_dir_name)
self.__create_output_files_structure(output_dir)
self.__create_report_xml(temp_dir_name + "/report.xml")
self.__generate_htmls_for_threads(output_dir, temp_dir_name)
self.__generate_htmls_for_javacores(output_dir, temp_dir_name)
self.__create_index_html(temp_dir_name, output_dir)

def __create_output_files_structure(self, output_dir):
if not os.path.isdir(output_dir):
os.mkdir(output_dir)
data_dir = output_dir + '/data'
if os.path.isdir(data_dir):
shutil.rmtree(data_dir, ignore_errors=True)
logging.info("Data dir: " + data_dir)
shutil.copytree("data", data_dir, dirs_exist_ok=True)

def generate_htmls_for_threads(self, output_dir, temp_dir_name):
def __generate_htmls_for_threads(self, output_dir, temp_dir_name):
self.create_xml_xsl_for_collection(temp_dir_name + "/threads",
"data/xml/threads/thread",
self.threads,
"thread")
self.generate_htmls_from_xmls_xsls(temp_dir_name + "/report.xml",
self.generate_htmls_from_xmls_xsls(self.report_xml_file,
temp_dir_name + "/threads",
output_dir + "/threads", )

def generate_htmls_for_javacores(self, output_dir, temp_dir_name):
def __generate_htmls_for_javacores(self, output_dir, temp_dir_name):
self.create_xml_xsl_for_collection(temp_dir_name + "/javacores",
"data/xml/javacores/javacore",
self.javacores,
"")
self.generate_htmls_from_xmls_xsls(temp_dir_name + "/report.xml",
self.generate_htmls_from_xmls_xsls(self.report_xml_file,
temp_dir_name + "/javacores",
output_dir + "/javacores", )

Expand Down Expand Up @@ -127,8 +160,7 @@ def create(path):
jset = JavacoreSet(path)
jset.populate_files_list()
if len(jset.files) < 1:
print("No javacores found. You need at least one javacore. Exiting with error 13")
exit(13)
raise RuntimeError("No javacores found. You need at least one javacore. Exiting with error 13")
first_javacore = jset.get_one_javacore()
jset.parse_common_data(first_javacore)
jset.parse_javacores()
Expand Down Expand Up @@ -258,8 +290,18 @@ def print_thread_states(self):
logging.debug(thread.name + "(id: " + str(thread.id) + "; hash: " + thread.get_hash() + ") " + \
"states: " + thread.get_snapshot_states())

def create_report_xml(self, output_file):
""" get all information an concatenate in an xml"""
# Assisted by WCA@IBM
# Latest GenAI contribution: ibm/granite-8b-code-instruct
def __create_report_xml(self, output_file):
"""
Generate an XML report containing information about the Javacoreset data.
Parameters:
- output_file (str): The path and filename of the output XML file.
Returns:
None
"""

logging.info("Generating report xml")

Expand Down Expand Up @@ -389,11 +431,29 @@ def create_report_xml(self, output_file):
self.doc.writexml(stream, indent=" ", addindent=" ", newl='\n', encoding="utf-8")
stream.close()
self.doc.unlink()
self.report_xml_file = output_file

logging.info("Finished generating report xml")

# Assisted by WCA@IBM
# Latest GenAI contribution: ibm/granite-8b-code-instruct
def get_javacore_set_in_xml(self):
"""
Returns the JavaCore set in the XML report file.
Parameters:
self (JavacoreSet): The instance of the javacore_set class.
Returns:
str: The JavaCore set in the XML format.
"""
file = open(self.report_xml_file, "r")
content = file.read()
file.close()
return content

@staticmethod
def create_index_html(input_dir, output_dir):
def __create_index_html(input_dir, output_dir):

# Copy index.xml and report.xsl to temp - for index.html we don't need to generate anything. Copying is enough.
shutil.copy2("data/xml/index.xml", input_dir)
Expand Down
29 changes: 26 additions & 3 deletions test/test_javacore_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from unittest.mock import patch

import javacore_analyzer
from javacore import CorruptedJavacoreException


def rm_tmp_dir():
Expand All @@ -38,13 +39,35 @@ def setUpClass(self):
";test/data/javacores/javacore.20220606.114502.32888.0002.txt", "tmp"]
self.twofilesargs_different_separator = ["javacore_analyzer",
"test/data/javacores/javacore.20220606.114458.32888.0001.txt"
":test/data/javacores/javacore.20220606.114502.32888.0002.txt", "tmp" ,
":test/data/javacores/javacore.20220606.114502.32888.0002.txt", "tmp",
"--separator", ":"]
self.issue129 = ["javacore_analyzer", "test/data/issue129", "tmp"]
self.expateerror = ["javacore_analyzer", "test/data/verboseGcJavacores", "tmp"]
self.threadnameswithquotes = ["javacore_analyzer", "test/data/quotationMarks", "tmp"]
rm_tmp_dir()

def test_api(self):
javacore_analyzer.process_javacores_and_generate_report_data(["test/data/archives/javacores.zip"], "tmp")
test_failed = False
try:
javacore_analyzer.process_javacores_and_generate_report_data(["test/data/archives/javacores-corrupted.zip"],
"tmp")
except CorruptedJavacoreException:
test_failed = True
self.assertTrue(test_failed, "API on corrupted file should fail but finished successfully")
javacore_analyzer.process_javacores_and_generate_report_data(
["test/data/javacores/javacore.20220606.114458.32888.0001.txt",
"test/data/javacores/javacore.20220606.114502.32888.0002.txt"], "tmp")
javacore_analyzer.process_javacores_and_generate_report_data(
["test/data/javacores"], "tmp")

test_failed = False
try:
javacore_analyzer.process_javacores_and_generate_report_data([],"tmp")
except RuntimeError:
test_failed = True
self.assertTrue(test_failed, "API on missing javacores should fail but finished successfully")

def test_issue129(self):
self.runMainWithParams(self.issue129)

Expand Down Expand Up @@ -140,9 +163,9 @@ def assert_data_generated_and_not_empty(self):
self.assertTrue(os.path.exists("tmp/index.html"), "index.html not generated")
self.assertTrue(os.path.getsize("tmp/index.html") > 0, "index.html file is empty")
self.assertTrue(os.path.exists("tmp/threads"))
self.assertGreaterEqual(self.number_files_in_dir("tmp/threads"),1)
self.assertGreaterEqual(self.number_files_in_dir("tmp/threads"), 1)
self.assertTrue(os.path.exists("tmp/javacores"))
self.assertGreaterEqual(self.number_files_in_dir("tmp/javacores"),1)
self.assertGreaterEqual(self.number_files_in_dir("tmp/javacores"), 1)
self.assertTrue(os.path.isfile("tmp/wait2-debug.log"))

@staticmethod
Expand Down

0 comments on commit 8773b79

Please sign in to comment.