-
Notifications
You must be signed in to change notification settings - Fork 24
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(auto-gen): detect if UCC can auto generate a file under package/default #1220
Changes from all commits
0a17375
bc53688
6a17248
34a5566
c95934f
5b4751b
efbc39c
e3d36ec
f797219
d91a97e
74221e8
3795330
6e03452
96878e4
d823de4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,256 @@ | ||
# | ||
# Copyright 2024 Splunk Inc. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# | ||
import sys | ||
from os import walk | ||
from os.path import sep | ||
from typing import List, Dict, Any, Union | ||
from logging import Logger | ||
from configparser import ConfigParser | ||
from lxml import etree, objectify | ||
|
||
if sys.version_info >= (3, 8): | ||
from typing import Literal | ||
else: | ||
from typing_extensions import Literal | ||
|
||
|
||
class CodeGeneratorDiffChecker: | ||
COMMON_FILES_MESSAGE_PART_1 = ( | ||
"Below are the file(s) that are auto generated by the UCC framework. " | ||
"The below files can be removed from your repository:" | ||
) | ||
COMMON_FILES_MESSAGE_PART_2 = ( | ||
"Please refer UCC framework documentation for the latest " | ||
"features that allows you to remove the above files." | ||
) | ||
DIFFERENT_FILES_MESSAGE = ( | ||
"Below are the file(s) with the differences that are not auto generated by the UCC framework. " | ||
"(Optionally), you can raise feature requests for UCC framework at " | ||
"'https://github.com/splunk/addonfactory-ucc-generator/issues/new/choose' " | ||
"with the output mentioned below." | ||
) | ||
|
||
def __init__(self, src_dir: str, dst_dir: str) -> None: | ||
self.source_directory = src_dir | ||
self.target_directory = dst_dir | ||
# {src_full_file_name::attrib_name : [{repository: value, output: value}]} | ||
self.different_files: Dict[str, Any[Dict[str, str], List[Dict[str, str]]]] = {} | ||
# {src_full_file_name : short_file_name} | ||
self.common_files: Dict[str, str] = {} | ||
|
||
def deduce_gen_and_custom_content( | ||
self, logger: Logger, ignore_file_list: List[str] = [] | ||
) -> None: | ||
""" | ||
Deduce that the files have same content or different | ||
- For the same content, developer can remove it from the repository | ||
- For the custom content, developer can raise enhancement request to UCC | ||
""" | ||
# we add these two files as they are required to be present in source code | ||
# TODO: try to implement generation of these files from globalConfig | ||
ignore_file_list.extend(["app.manifest", "README.txt"]) | ||
|
||
src_all_files: Dict[str, str] = {} | ||
for root, _, files in walk(self.source_directory): | ||
for file in files: | ||
src_all_files[file] = sep.join([root, file]) | ||
|
||
dest_all_files: Dict[str, str] = {} | ||
for root, _, files in walk(self.target_directory): | ||
for file in files: | ||
dest_all_files[file] = sep.join([root, file]) | ||
dest_all_files["default.meta"] = sep.join([self.target_directory, "metadata"]) | ||
|
||
for file_name in dest_all_files.keys(): | ||
if file_name in src_all_files.keys(): | ||
if file_name in ignore_file_list: | ||
continue | ||
if file_name.endswith(".conf"): | ||
self._conf_file_diff_checker( | ||
src_all_files[file_name], dest_all_files[file_name] | ||
) | ||
elif file_name.endswith((".xml", ".html")): | ||
self._xml_file_diff_checker( | ||
src_all_files[file_name], dest_all_files[file_name] | ||
) | ||
|
||
self.print_files(logger) | ||
|
||
def _conf_file_diff_checker(self, src_file: str, target_file: str) -> None: | ||
""" | ||
Find the difference between the source code and generated code for the | ||
conf files created in package/default directory | ||
""" | ||
source_config = ConfigParser() | ||
source_config.read(src_file) | ||
target_config = ConfigParser() | ||
target_config.read(target_file) | ||
|
||
is_file_same: bool = True | ||
|
||
def __compare_stanza( | ||
old_config: ConfigParser, new_config: ConfigParser, is_repo_first: bool | ||
) -> bool: | ||
for sect in new_config.sections(): | ||
for key in new_config.options(sect): | ||
old_attrib = old_config.get(sect, key, fallback="") | ||
new_attrib = new_config.get(sect, key, fallback="") | ||
if old_attrib != new_attrib: | ||
nonlocal src_file | ||
# we collect the diff found between the two files | ||
self.different_files[f"{src_file}[{sect}]::{key}"] = ( | ||
{ | ||
"repository": old_attrib, | ||
"output": new_attrib, | ||
} | ||
if is_repo_first | ||
else { | ||
"output": old_attrib, | ||
"repository": new_attrib, | ||
} | ||
) | ||
# we set the flag off whenever we find difference in stanza attributes for a given file | ||
nonlocal is_file_same | ||
is_file_same = False | ||
return is_file_same | ||
|
||
is_file_same = __compare_stanza(target_config, source_config, False) | ||
is_file_same = __compare_stanza(source_config, target_config, True) | ||
if is_file_same: | ||
self.common_files[src_file] = src_file.split(sep=sep)[-1] | ||
|
||
def _xml_file_diff_checker(self, src_file: str, target_file: str) -> None: | ||
""" | ||
Find the difference between the source code and generated code for the | ||
XML or HTML files created in package/default/data directory | ||
""" | ||
diff_count = len(self.different_files) | ||
parser = etree.XMLParser(remove_comments=True) | ||
try: | ||
src_tree = objectify.parse(src_file, parser=parser) | ||
except etree.XMLSyntaxError: | ||
self.different_files[src_file] = { | ||
"repository": "invalid XML present. Please update the source code with valid XML.", | ||
"output": "[unverified]", | ||
} | ||
return | ||
try: | ||
target_tree = objectify.parse(target_file, parser=parser) | ||
except etree.XMLSyntaxError: | ||
self.different_files[src_file] = { | ||
"repository": "[unverified]", | ||
"output": "invalid XML generated from globalConfig. Ensure necessary characters are escaped.", | ||
} | ||
return | ||
|
||
src_root = src_tree.getroot() | ||
target_root = target_tree.getroot() | ||
|
||
# remove all the code comments from the XML files, keep changes in-memory | ||
src_tree = remove_code_comments("xml", src_root) | ||
target_tree = remove_code_comments("xml", target_root) | ||
|
||
def __compare_elements( | ||
src_elem: etree._Element, target_elem: etree._Element | ||
) -> None: | ||
if src_elem.tag != target_elem.tag: | ||
if self.different_files.get(f"{src_file}::{src_elem.tag}") is None: | ||
self.different_files[f"{src_file}::{src_elem.tag}"] = [] | ||
self.different_files[f"{src_file}::{src_elem.tag}"].append( | ||
{"repository": src_elem.tag, "output": target_elem.tag} | ||
) | ||
|
||
if src_elem.text != target_elem.text: | ||
if self.different_files.get(f"{src_file}::{src_elem.tag}") is None: | ||
self.different_files[f"{src_file}::{src_elem.tag}"] = [] | ||
# strip the extra spaces from texts in XMLs | ||
self.different_files[f"{src_file}::{src_elem.tag}"].append( | ||
{ | ||
"repository": src_elem.text.strip(), | ||
"output": target_elem.text.strip(), | ||
} | ||
) | ||
|
||
if src_elem.attrib != target_elem.attrib: | ||
if self.different_files.get(f"{src_file}::{src_elem.tag}") is None: | ||
self.different_files[f"{src_file}::{src_elem.tag}"] = [] | ||
self.different_files[f"{src_file}::{src_elem.tag}"].append( | ||
{"repository": src_elem.attrib, "output": target_elem.attrib} | ||
) | ||
|
||
for child1, child2 in zip(src_elem, target_elem): | ||
# recursively check for tags, attributes, texts of XML | ||
__compare_elements(child1, child2) | ||
|
||
__compare_elements(src_root, target_root) | ||
if diff_count == len(self.different_files): | ||
self.common_files[src_file] = src_file.split(sep=sep)[-1] | ||
|
||
def print_files(self, logger: Logger) -> None: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I checked out your branch and run the following command:
Below is the logging that we provide out of this feature. I think we need to adjust this as it is not entirely clear what a developer should do.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The output points the file name with the stanza and property, the |
||
""" | ||
Print the common and different files in the console | ||
""" | ||
messages: List[str] = [] | ||
if self.common_files: | ||
messages.append("-" * 120) | ||
messages.append(self.COMMON_FILES_MESSAGE_PART_1) | ||
messages.extend( | ||
[f"{idx + 1}) {f}" for idx, f in enumerate(self.common_files.keys())] | ||
) | ||
messages.append(self.COMMON_FILES_MESSAGE_PART_2) | ||
messages.append("-" * 120) | ||
logger.warning("\n".join(messages)) | ||
|
||
messages.clear() | ||
|
||
if self.different_files: | ||
messages.append("+" * 120) | ||
messages.append(self.DIFFERENT_FILES_MESSAGE) | ||
file_count = 1 | ||
for k, v in self.different_files.items(): | ||
# file_diff_count = 1 | ||
file_msg: str = "" | ||
file_msg = f"{file_count}) {k}" | ||
if isinstance(v, dict): | ||
file_msg += f"\n\tSource: {v.get('repository')}, Generated: {v.get('output')}" | ||
elif isinstance(v, list): | ||
file_msg += "".join( | ||
[ | ||
f"\n\tSource: {iv.get('repository')}, Generated: {iv.get('output')}" | ||
for iv in v | ||
] | ||
) | ||
messages.append(file_msg) | ||
file_count += 1 | ||
|
||
messages.append("+" * 120) | ||
logger.warning("\n".join(messages)) | ||
|
||
|
||
def remove_code_comments( | ||
file_type: Union[Literal["xml"]], source_code: Union[etree.ElementTree] | ||
) -> Union[etree.ElementTree, Exception]: | ||
""" | ||
Remove comments from code files before parsing them | ||
""" | ||
if file_type == "xml": | ||
for element in source_code.iter(): | ||
for comment in element.xpath("//comment()"): | ||
parent = comment.getparent() | ||
parent.remove(comment) | ||
return source_code | ||
else: | ||
raise Exception("Unknown 'file_type' provided.") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I extracted a library from UCC and PSA to parse .conf files, you can find it here - https://github.com/splunk/addonfactory-splunk-conf-parser-lib. It is built on top of
configparser
but supports multi lines and comments. I think it would be better to use this one instead of plainconfigparser
.