Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(auto-gen): detect if UCC can auto generate a file under package/default #1220

Closed
wants to merge 15 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ pip-wheel-metadata/UNKNOWN.dist-info/top_level.txt
pip-wheel-metadata/UNKNOWN.dist-info/METADATA
.vscode/settings.json
.DS_Store
.venv
# Ignore multiple venv
.venv*
output
# The following files should never be checked into git but can not be in the
# ignore file due to poetry issues
Expand Down Expand Up @@ -45,4 +46,4 @@ init_addon_for_ucc_package/
# UI build
# ignore everything except redirect_page.js
splunk_add_on_ucc_framework/package/appserver/static/js/build/
!splunk_add_on_ucc_framework/package/appserver/static/js/build/redirect_page.js
!splunk_add_on_ucc_framework/package/appserver/static/js/build/redirect_page.js
1 change: 1 addition & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ The `splunk-add-on-ucc-framework`:
* it possibly extends the UI with custom codes (for more information, see [here](custom_ui_extensions/custom_hook.md)).
* it possibly extends the build process via a `additional_packaging.py` file (more information, [here](additional_packaging.md)).
* generates the necessary files defined for the Alert Action, if defined in globalConfig (for more informaiton, see [here](alert_actions/index.md)).
* provides you the list of duplicate code present in your source code that can be safely removed as it would be generated by the UCC framework. Optionally, you can suggest for the customizations that you have so that UCC framework can implement the same that can overall help the Splunk add-on developers. Note: currently supports for files created in `package/default` directory.

## Installation

Expand Down
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ defusedxml = "^0.7.1"
requests = "^2.31.0"
urllib3 = "<2"
colorama = "^0.4.6"
lxml = "4.9.4"

[tool.poetry.group.dev.dependencies]
mkdocs = "^1.4.2"
Expand Down
256 changes: 256 additions & 0 deletions splunk_add_on_ucc_framework/auto_gen_comparator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
#
# Copyright 2024 Splunk Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import sys
from os import walk
from os.path import sep
from typing import List, Dict, Any, Union
from logging import Logger
from configparser import ConfigParser
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I extracted a library from UCC and PSA to parse .conf files, you can find it here - https://github.com/splunk/addonfactory-splunk-conf-parser-lib. It is built on top of configparser but supports multi lines and comments. I think it would be better to use this one instead of plain configparser.

from lxml import etree, objectify

if sys.version_info >= (3, 8):
from typing import Literal
else:
from typing_extensions import Literal


class CodeGeneratorDiffChecker:
COMMON_FILES_MESSAGE_PART_1 = (
"Below are the file(s) that are auto generated by the UCC framework. "
"The below files can be removed from your repository:"
)
COMMON_FILES_MESSAGE_PART_2 = (
"Please refer UCC framework documentation for the latest "
"features that allows you to remove the above files."
)
DIFFERENT_FILES_MESSAGE = (
"Below are the file(s) with the differences that are not auto generated by the UCC framework. "
"(Optionally), you can raise feature requests for UCC framework at "
"'https://github.com/splunk/addonfactory-ucc-generator/issues/new/choose' "
"with the output mentioned below."
)

def __init__(self, src_dir: str, dst_dir: str) -> None:
self.source_directory = src_dir
self.target_directory = dst_dir
# {src_full_file_name::attrib_name : [{repository: value, output: value}]}
self.different_files: Dict[str, Any[Dict[str, str], List[Dict[str, str]]]] = {}
# {src_full_file_name : short_file_name}
self.common_files: Dict[str, str] = {}

def deduce_gen_and_custom_content(
self, logger: Logger, ignore_file_list: List[str] = []
) -> None:
"""
Deduce that the files have same content or different
- For the same content, developer can remove it from the repository
- For the custom content, developer can raise enhancement request to UCC
"""
# we add these two files as they are required to be present in source code
# TODO: try to implement generation of these files from globalConfig
ignore_file_list.extend(["app.manifest", "README.txt"])

src_all_files: Dict[str, str] = {}
for root, _, files in walk(self.source_directory):
for file in files:
src_all_files[file] = sep.join([root, file])

dest_all_files: Dict[str, str] = {}
for root, _, files in walk(self.target_directory):
for file in files:
dest_all_files[file] = sep.join([root, file])
dest_all_files["default.meta"] = sep.join([self.target_directory, "metadata"])

for file_name in dest_all_files.keys():
if file_name in src_all_files.keys():
if file_name in ignore_file_list:
continue
if file_name.endswith(".conf"):
self._conf_file_diff_checker(
src_all_files[file_name], dest_all_files[file_name]
)
elif file_name.endswith((".xml", ".html")):
self._xml_file_diff_checker(
src_all_files[file_name], dest_all_files[file_name]
)

self.print_files(logger)

def _conf_file_diff_checker(self, src_file: str, target_file: str) -> None:
"""
Find the difference between the source code and generated code for the
conf files created in package/default directory
"""
source_config = ConfigParser()
source_config.read(src_file)
target_config = ConfigParser()
target_config.read(target_file)

is_file_same: bool = True

def __compare_stanza(
old_config: ConfigParser, new_config: ConfigParser, is_repo_first: bool
) -> bool:
for sect in new_config.sections():
for key in new_config.options(sect):
old_attrib = old_config.get(sect, key, fallback="")
new_attrib = new_config.get(sect, key, fallback="")
if old_attrib != new_attrib:
nonlocal src_file
# we collect the diff found between the two files
self.different_files[f"{src_file}[{sect}]::{key}"] = (
{
"repository": old_attrib,
"output": new_attrib,
}
if is_repo_first
else {
"output": old_attrib,
"repository": new_attrib,
}
)
# we set the flag off whenever we find difference in stanza attributes for a given file
nonlocal is_file_same
is_file_same = False
return is_file_same

is_file_same = __compare_stanza(target_config, source_config, False)
is_file_same = __compare_stanza(source_config, target_config, True)
if is_file_same:
self.common_files[src_file] = src_file.split(sep=sep)[-1]

def _xml_file_diff_checker(self, src_file: str, target_file: str) -> None:
"""
Find the difference between the source code and generated code for the
XML or HTML files created in package/default/data directory
"""
diff_count = len(self.different_files)
parser = etree.XMLParser(remove_comments=True)
try:
src_tree = objectify.parse(src_file, parser=parser)
except etree.XMLSyntaxError:
self.different_files[src_file] = {
"repository": "invalid XML present. Please update the source code with valid XML.",
"output": "[unverified]",
}
return
try:
target_tree = objectify.parse(target_file, parser=parser)
except etree.XMLSyntaxError:
self.different_files[src_file] = {
"repository": "[unverified]",
"output": "invalid XML generated from globalConfig. Ensure necessary characters are escaped.",
}
return

src_root = src_tree.getroot()
target_root = target_tree.getroot()

# remove all the code comments from the XML files, keep changes in-memory
src_tree = remove_code_comments("xml", src_root)
target_tree = remove_code_comments("xml", target_root)

def __compare_elements(
src_elem: etree._Element, target_elem: etree._Element
) -> None:
if src_elem.tag != target_elem.tag:
if self.different_files.get(f"{src_file}::{src_elem.tag}") is None:
self.different_files[f"{src_file}::{src_elem.tag}"] = []
self.different_files[f"{src_file}::{src_elem.tag}"].append(
{"repository": src_elem.tag, "output": target_elem.tag}
)

if src_elem.text != target_elem.text:
if self.different_files.get(f"{src_file}::{src_elem.tag}") is None:
self.different_files[f"{src_file}::{src_elem.tag}"] = []
# strip the extra spaces from texts in XMLs
self.different_files[f"{src_file}::{src_elem.tag}"].append(
{
"repository": src_elem.text.strip(),
"output": target_elem.text.strip(),
}
)

if src_elem.attrib != target_elem.attrib:
if self.different_files.get(f"{src_file}::{src_elem.tag}") is None:
self.different_files[f"{src_file}::{src_elem.tag}"] = []
self.different_files[f"{src_file}::{src_elem.tag}"].append(
{"repository": src_elem.attrib, "output": target_elem.attrib}
)

for child1, child2 in zip(src_elem, target_elem):
# recursively check for tags, attributes, texts of XML
__compare_elements(child1, child2)

__compare_elements(src_root, target_root)
if diff_count == len(self.different_files):
self.common_files[src_file] = src_file.split(sep=sep)[-1]

def print_files(self, logger: Logger) -> None:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked out your branch and run the following command:

poetry run ucc-gen build --source ../splunk-add-on-for-google-workspace/package

Below is the logging that we provide out of this feature. I think we need to adjust this as it is not entirely clear what a developer should do.

WARNING: ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Below are the file(s) with the differences that are not auto generated by the UCC framework. (Optionally), you can raise feature requests for UCC framework at 'https://github.com/splunk/addonfactory-ucc-generator/issues/new/choose' with the output mentioned below.
1) ../splunk-add-on-for-google-workspace/package/default/inputs.conf[activity_report]::interval
	Source: 3600, Generated:
2) ../splunk-add-on-for-google-workspace/package/default/inputs.conf[activity_report]::lookbackoffset
	Source: 10800, Generated:
3) ../splunk-add-on-for-google-workspace/package/default/inputs.conf[gws_gmail_logs]::interval
	Source: 3600, Generated:
4) ../splunk-add-on-for-google-workspace/package/default/inputs.conf[gws_gmail_logs_migrated]::interval
	Source: 3600, Generated:
5) ../splunk-add-on-for-google-workspace/package/default/inputs.conf[gws_user_identity]::interval
	Source: 3600, Generated:
6) ../splunk-add-on-for-google-workspace/package/default/inputs.conf[gws_alert_center]::interval
	Source: 3600, Generated:
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The output points the file name with the stanza and property, the Source means what was present in the source code/ repository and Generated means the code that was generated by ucc-gen command.
Here, the interval property in the inputs.conf was present in the source code, but wasn't generated by ucc-gen, hence the Generated value is empty.

"""
Print the common and different files in the console
"""
messages: List[str] = []
if self.common_files:
messages.append("-" * 120)
messages.append(self.COMMON_FILES_MESSAGE_PART_1)
messages.extend(
[f"{idx + 1}) {f}" for idx, f in enumerate(self.common_files.keys())]
)
messages.append(self.COMMON_FILES_MESSAGE_PART_2)
messages.append("-" * 120)
logger.warning("\n".join(messages))

messages.clear()

if self.different_files:
messages.append("+" * 120)
messages.append(self.DIFFERENT_FILES_MESSAGE)
file_count = 1
for k, v in self.different_files.items():
# file_diff_count = 1
file_msg: str = ""
file_msg = f"{file_count}) {k}"
if isinstance(v, dict):
file_msg += f"\n\tSource: {v.get('repository')}, Generated: {v.get('output')}"
elif isinstance(v, list):
file_msg += "".join(
[
f"\n\tSource: {iv.get('repository')}, Generated: {iv.get('output')}"
for iv in v
]
)
messages.append(file_msg)
file_count += 1

messages.append("+" * 120)
logger.warning("\n".join(messages))


def remove_code_comments(
file_type: Union[Literal["xml"]], source_code: Union[etree.ElementTree]
) -> Union[etree.ElementTree, Exception]:
"""
Remove comments from code files before parsing them
"""
if file_type == "xml":
for element in source_code.iter():
for comment in element.xpath("//comment()"):
parent = comment.getparent()
parent.remove(comment)
return source_code
else:
raise Exception("Unknown 'file_type' provided.")
14 changes: 12 additions & 2 deletions splunk_add_on_ucc_framework/commands/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
utils,
)
from splunk_add_on_ucc_framework import dashboard
from splunk_add_on_ucc_framework.auto_gen_comparator import CodeGeneratorDiffChecker
from splunk_add_on_ucc_framework import app_conf as app_conf_lib
from splunk_add_on_ucc_framework import meta_conf as meta_conf_lib
from splunk_add_on_ucc_framework import server_conf as server_conf_lib
Expand Down Expand Up @@ -525,6 +526,7 @@ def generate(
app_manifest = _get_app_manifest(source)
ta_name = app_manifest.get_addon_name()

auto_gen_ignore_list = []
gc_path = _get_and_check_global_config_path(source, config_path)
if gc_path:
logger.info(f"Using globalConfig file located @ {gc_path}")
Expand Down Expand Up @@ -604,8 +606,10 @@ def generate(
_add_modular_input(ta_name, global_config, output_directory)
if global_config.has_alerts():
logger.info("Generating alerts code")
alert_builder.generate_alerts(
global_config, ta_name, internal_root_dir, output_directory
auto_gen_ignore_list.extend(
alert_builder.generate_alerts(
global_config, ta_name, internal_root_dir, output_directory
)
)

conf_file_names = []
Expand Down Expand Up @@ -670,6 +674,12 @@ def generate(
removed_list = _remove_listed_files(ignore_list)
if removed_list:
logger.info("Removed:\n{}".format("\n".join(removed_list)))

comparator = CodeGeneratorDiffChecker(
source, os.path.join(output_directory, ta_name)
)
comparator.deduce_gen_and_custom_content(logger, auto_gen_ignore_list)

utils.recursive_overwrite(source, os.path.join(output_directory, ta_name))
logger.info("Copied package directory")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import logging
from os import linesep, makedirs, path as op
import shutil
from typing import Dict, Any
from typing import Dict, Any, List

from jinja2 import Environment, FileSystemLoader

Expand Down Expand Up @@ -64,6 +64,7 @@ def __init__(
"payload_format": "json",
"icon_path": "alerticon.png",
}
self.alerts_icon_list: List[str] = []

def get_local_conf_file_path(self, conf_name: str) -> str:
local_path = op.join(self._package_path, "default")
Expand Down Expand Up @@ -114,6 +115,7 @@ def generate_conf(self) -> None:
elif k == "alert_props":
if alert.get("iconFileName", "alerticon.png") != "alerticon.png":
alert["alert_props"]["icon_path"] = alert["iconFileName"]
self.alerts_icon_list.append(alert["iconFileName"])
else:
# we copy UCC framework's alerticon.png only when a custom isn't provided
shutil.copy(
Expand Down Expand Up @@ -250,12 +252,13 @@ def generate_spec(self) -> None:
+ 'object="alert_actions.conf.spec", object_type="file"'
)

def handle(self) -> None:
def handle(self) -> List[str]:
self.add_default_settings()
self.generate_conf()
self.generate_spec()
self.generate_eventtypes()
self.generate_tags()
return self.alerts_icon_list

def add_default_settings(self) -> None:
for alert in self._alert_settings:
Expand Down
Loading
Loading