Skip to content

Commit

Permalink
Merge pull request #561 from r-world-devs/400-allow-define-orgs-and-r…
Browse files Browse the repository at this point in the history
…epos-in-set_host

Allow setting search scope to both orgs and repositories
  • Loading branch information
maciekbanas authored Dec 20, 2024
2 parents 4c2bff0 + 88ed0d6 commit f16a1cd
Show file tree
Hide file tree
Showing 53 changed files with 2,644 additions and 1,057 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: GitStats
Title: Standardized Git Repository Data
Version: 2.1.2.9003
Version: 2.1.2.9004
Authors@R: c(
person(given = "Maciej", family = "Banas", email = "[email protected]", role = c("aut", "cre")),
person(given = "Kamil", family = "Koziej", email = "[email protected]", role = "aut"),
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Features:

- From now on it is possible to pass `orgs` and `repos` in `set_*_host()` functions ([#400](https://github.com/r-world-devs/GitStats/issues/400)).
- Improved `get_commits_stats()` function ([#556](https://github.com/r-world-devs/GitStats/issues/556), [#557](https://github.com/r-world-devs/GitStats/issues/557)) with:
- giving possibility to customize grouping variable by passing it with the `group_var` parameter,
- changing name of the `time_interval` parameter to `time_aggregation`,
Expand All @@ -13,6 +14,7 @@
- Fixed pulling commits for GitLab subgroups when repositories are set as scope to scan ([#551](https://github.com/r-world-devs/GitStats/issues/551)).
- Filled more information on `author_name` and `author_login` if it was missing in `commits_table` ([#550](https://github.com/r-world-devs/GitStats/issues/550)).
- Handled a `GraphQL` response error when pulling repositories with R error. Earlier, `GitStats` just returned empty table with no clue on what has happened, as errors from `GraphQL` are returned as list outputs (they do not break code).
- Fixed getting R package usage when repositories are set ([#548](https://github.com/r-world-devs/GitStats/issues/548)).

# GitStats 2.1.2

Expand Down
5 changes: 1 addition & 4 deletions R/EngineGraphQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ EngineGraphQL <- R6::R6Class(
},

get_path_from_files_structure = function(host_files_structure,
only_text_files,
org,
repo = NULL) {
if (is.null(repo)) {
Expand All @@ -85,9 +84,7 @@ EngineGraphQL <- R6::R6Class(
} else {
file_path <- host_files_structure[[org]][[repo]]
}
if (only_text_files) {
file_path <- file_path[!grepl(non_text_files_pattern, file_path)]
}
file_path <- file_path[grepl(text_files_pattern, file_path)]
return(file_path)
}
)
Expand Down
38 changes: 17 additions & 21 deletions R/EngineGraphQLGitHub.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ EngineGraphQLGitHub <- R6::R6Class(
while (next_page) {
repos_response <- private$get_repos_page(
login = org,
type = type,
type = type,
repo_cursor = repo_cursor
)
repositories <- if (type == "organization") {
Expand Down Expand Up @@ -175,9 +175,8 @@ EngineGraphQLGitHub <- R6::R6Class(
get_files_from_org = function(org,
type,
repos,
file_paths,
host_files_structure,
only_text_files,
file_paths = NULL,
host_files_structure = NULL,
verbose = TRUE,
progress = TRUE) {
repo_data <- private$get_repos_data(
Expand All @@ -193,7 +192,6 @@ EngineGraphQLGitHub <- R6::R6Class(
org = org,
file_paths = file_paths,
host_files_structure = host_files_structure,
only_text_files = only_text_files,
progress = progress
)
names(org_files_list) <- repositories
Expand All @@ -204,7 +202,7 @@ EngineGraphQLGitHub <- R6::R6Class(
},

# Prepare files table.
prepare_files_table = function(files_response, org, file_path) {
prepare_files_table = function(files_response, org) {
if (!is.null(files_response)) {
files_table <- purrr::map(files_response, function(repository) {
purrr::imap(repository, function(file_data, file_name) {
Expand All @@ -230,10 +228,10 @@ EngineGraphQLGitHub <- R6::R6Class(
# Pull all files from all repositories of an organization.
get_files_structure_from_org = function(org,
type,
repos,
pattern = NULL,
depth = Inf,
verbose = FALSE,
repos = NULL,
pattern = NULL,
depth = Inf,
verbose = FALSE,
progress = TRUE) {
repo_data <- private$get_repos_data(
org = org,
Expand Down Expand Up @@ -297,8 +295,8 @@ EngineGraphQLGitHub <- R6::R6Class(
},

# Prepare releases table.
prepare_releases_table = function(releases_response, org, date_from, date_until) {
if (!is.null(releases_response)) {
prepare_releases_table = function(releases_response, org, since, until) {
if (length(releases_response) > 0) {
releases_table <-
purrr::map(releases_response, function(release) {
release_table <- purrr::map(release$data$repository$releases$nodes, function(node) {
Expand All @@ -310,7 +308,7 @@ EngineGraphQLGitHub <- R6::R6Class(
release_log = node$description
)
}) %>%
purrr::list_rbind() %>%
purrr::list_rbind() |>
dplyr::mutate(
repo_name = release$data$repository$name,
repo_url = release$data$repository$url
Expand All @@ -321,14 +319,14 @@ EngineGraphQLGitHub <- R6::R6Class(
)
return(release_table)
}) %>%
purrr::list_rbind() %>%
purrr::list_rbind() |>
dplyr::filter(
published_at <= as.POSIXct(date_until)
published_at <= as.POSIXct(until)
)
if (!is.null(date_from)) {
if (!is.null(since)) {
releases_table <- releases_table %>%
dplyr::filter(
published_at >= as.POSIXct(date_from)
published_at >= as.POSIXct(since)
)
}
} else {
Expand Down Expand Up @@ -453,19 +451,17 @@ EngineGraphQLGitHub <- R6::R6Class(
def_branches,
org,
host_files_structure,
only_text_files,
file_paths,
progress) {
purrr::map2(repositories, def_branches, function(repo, def_branch) {
if (!is.null(host_files_structure)) {
file_paths <- private$get_path_from_files_structure(
host_files_structure = host_files_structure,
only_text_files = only_text_files,
org = org,
repo = repo
)
} else if (is.null(host_files_structure) && only_text_files) {
file_paths <- file_paths[!grepl(non_text_files_pattern, file_paths)]
} else if (is.null(host_files_structure)) {
file_paths <- file_paths[grepl(text_files_pattern, file_paths)]
}
repo_files_list <- purrr::map(file_paths, function(file_path) {
private$get_file_response(
Expand Down
57 changes: 26 additions & 31 deletions R/EngineGraphQLGitLab.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,23 +82,24 @@ EngineGraphQLGitLab <- R6::R6Class(
if (length(repos_list) > 0) {
repos_table <- purrr::map(repos_list, function(repo) {
repo <- repo$node
repo$default_branch <- repo$repository$rootRef %||% ""
repo[["repo_id"]] <- sub(".*/(\\d+)$", "\\1", repo$repo_id)
repo[["default_branch"]] <- repo$repository$rootRef %||% ""
repo$repository <- NULL
repo$languages <- if (length(repo$languages) > 0) {
repo[["languages"]] <- if (length(repo$languages) > 0) {
purrr::map_chr(repo$languages, ~ .$name) %>%
paste0(collapse = ", ")
} else {
""
}
repo$created_at <- gts_to_posixt(repo$created_at)
repo$issues_open <- repo$issues$opened
repo$issues_closed <- repo$issues$closed
repo[["created_at"]] <- gts_to_posixt(repo$created_at)
repo[["issues_open"]] <- repo$issues$opened
repo[["issues_closed"]] <- repo$issues$closed
repo$issues <- NULL
repo$last_activity_at <- as.POSIXct(repo$last_activity_at)
repo$organization <- repo$namespace$path
repo[["last_activity_at"]] <- as.POSIXct(repo$last_activity_at)
repo[["organization"]] <- repo$namespace$path
repo$namespace <- NULL
repo$repo_path <- NULL # temporary to close issue 338
data.frame(repo)
return(data.frame(repo))
}) %>%
purrr::list_rbind() %>%
dplyr::relocate(
Expand All @@ -124,9 +125,8 @@ EngineGraphQLGitLab <- R6::R6Class(
get_files_from_org = function(org,
type,
repos,
file_paths,
host_files_structure,
only_text_files,
file_paths = NULL,
host_files_structure = NULL,
verbose = FALSE,
progress = FALSE) {
org <- URLdecode(org)
Expand All @@ -136,11 +136,10 @@ EngineGraphQLGitLab <- R6::R6Class(
if (!is.null(host_files_structure)) {
file_paths <- private$get_path_from_files_structure(
host_files_structure = host_files_structure,
only_text_files = only_text_files,
org = org
)
} else if (is.null(host_files_structure) && only_text_files) {
file_paths <- file_paths[!grepl(non_text_files_pattern, file_paths)]
} else {
file_paths <- file_paths[grepl(text_files_pattern, file_paths)]
}
if (type == "organization") {
while (next_page) {
Expand Down Expand Up @@ -177,7 +176,6 @@ EngineGraphQLGitLab <- R6::R6Class(
repos = repos,
file_paths = file_paths,
host_files_structure = host_files_structure,
only_text_files = only_text_files,
verbose = verbose,
progress = progress
)
Expand All @@ -194,13 +192,13 @@ EngineGraphQLGitLab <- R6::R6Class(
purrr::discard(~ length(.$repository$blobs$nodes) == 0)
if (is.null(files_list)) files_list <- list()
if (length(files_list) > 0) {
next_page <- files_response$pageInfo$hasNextPage
next_page <- projects$pageInfo$hasNextPage
} else {
next_page <- FALSE
}
if (is.null(next_page)) next_page <- FALSE
if (next_page) {
end_cursor <- files_response$pageInfo$endCursor
end_cursor <- projects$pageInfo$endCursor
} else {
end_cursor <- ""
}
Expand All @@ -219,7 +217,6 @@ EngineGraphQLGitLab <- R6::R6Class(
repos = repos,
file_paths = file_paths,
host_files_structure = host_files_structure,
only_text_files = only_text_files,
verbose = verbose,
progress = progress
)
Expand All @@ -235,7 +232,6 @@ EngineGraphQLGitLab <- R6::R6Class(
repos,
file_paths = NULL,
host_files_structure = NULL,
only_text_files = TRUE,
verbose = FALSE,
progress = FALSE) {
if (is.null(repos)) {
Expand All @@ -250,16 +246,15 @@ EngineGraphQLGitLab <- R6::R6Class(
if (!is.null(host_files_structure)) {
file_paths <- private$get_path_from_files_structure(
host_files_structure = host_files_structure,
only_text_files = only_text_files,
org = org,
repo = repo
org = org,
repo = repo
)
}
files_response <- tryCatch(
{
private$get_file_blobs_response(
org = org,
repo = repo,
org = org,
repo = repo,
file_paths = file_paths
)
},
Expand All @@ -272,7 +267,7 @@ EngineGraphQLGitLab <- R6::R6Class(
},

# Prepare files table.
prepare_files_table = function(files_response, org, file_path) {
prepare_files_table = function(files_response, org) {
if (!is.null(files_response)) {
if (private$response_prepared_by_iteration(files_response)) {
files_table <- purrr::map(files_response, function(response_data) {
Expand Down Expand Up @@ -315,7 +310,7 @@ EngineGraphQLGitLab <- R6::R6Class(

get_files_structure_from_org = function(org,
type,
repos,
repos = NULL,
pattern = NULL,
depth = Inf,
verbose = TRUE,
Expand Down Expand Up @@ -368,7 +363,7 @@ EngineGraphQLGitLab <- R6::R6Class(
response <- self$gql_response(
gql_query = releases_from_repo_query,
vars = list(
"project_path" = utils::URLdecode(repository)
"project_path" = paste0(org, "/", utils::URLdecode(repository))
)
)
return(response)
Expand All @@ -378,7 +373,7 @@ EngineGraphQLGitLab <- R6::R6Class(
},

# Prepare releases table.
prepare_releases_table = function(releases_response, org, date_from, date_until) {
prepare_releases_table = function(releases_response, org, since, until) {
if (length(releases_response) > 0) {
releases_table <-
purrr::map(releases_response, function(release) {
Expand All @@ -404,12 +399,12 @@ EngineGraphQLGitLab <- R6::R6Class(
}) %>%
purrr::list_rbind() %>%
dplyr::filter(
published_at <= as.POSIXct(date_until)
published_at <= as.POSIXct(until)
)
if (!is.null(date_from)) {
if (!is.null(since)) {
releases_table <- releases_table %>%
dplyr::filter(
published_at >= as.POSIXct(date_from)
published_at >= as.POSIXct(since)
)
}
} else {
Expand Down
Loading

0 comments on commit f16a1cd

Please sign in to comment.