From 7957698c75fe49b2b3e46d5c3b6407a95cd815bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20de=20Jesus=20Filho?= Date: Mon, 4 Mar 2024 16:13:32 -0300 Subject: [PATCH] Update tjsp_ler_tabela_docs_cd_processo.R --- R/tjsp_ler_tabela_docs_cd_processo.R | 48 +++++++++++++++------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/R/tjsp_ler_tabela_docs_cd_processo.R b/R/tjsp_ler_tabela_docs_cd_processo.R index 4d81478..44d94d8 100644 --- a/R/tjsp_ler_tabela_docs_cd_processo.R +++ b/R/tjsp_ler_tabela_docs_cd_processo.R @@ -7,57 +7,61 @@ #' @export #' tjsp_ler_tabela_docs_cd_processo <- function(arquivos = NULL, diretorio = "."){ - + if (is.null(arquivos)) { - + arquivos <- list.files(diretorio,full.names = TRUE) - + } - + pb <- progress::progress_bar$new(total = length(arquivos)) - + purrr::map_dfr(arquivos,purrr::possibly(~{ - + pb$tick() - - cd_processo <- stringr::str_extract(.x,"(?<=processo_)\\w+") - + + cd_processo_pg <- stringr::str_extract(.x,"(?<=processo_pg_)\\w+?(?=_)") + cd_processo_sg <- stringr::str_extract(.x,"(?<=processo_sg_)\\w+") + suppressMessages({ - + doc <- .x |> xml2::read_html() |> xml2::xml_text() |> stringr::str_extract("(?<=requestScope = )\\X+?(?=;)") |> jsonlite::fromJSON() - - + + doc_name <- tibble::tibble(doc_name= doc$data$title) |> tibble::rownames_to_column("id_doc") - + paginas <- doc$children[[2]]$data$indicePagina - + df <- purrr::imap_dfr(doc$children,~{ - + url_doc <- .x$data$parametros - + pagina_inicial <- .x$data$title |> stringr::str_extract("\\d+") - + pagina_final <- .x$data$title |> stringr::str_extract("\\d+$") - + tibble::tibble(id_doc = .y, pagina_inicial, pagina_final, url_doc) |> dplyr::mutate(id_doc = as.character(id_doc)) - + }) |> dplyr::left_join(doc_name) |> dplyr::select(id_doc, doc_name, pagina_inicial, pagina_final, url_doc) |> dplyr::mutate(url_doc = paste0("https://esaj.tjsp.jus.br/pastadigital/getPDF.do?",url_doc)) |> dplyr::group_by(id_doc) |> dplyr::ungroup() |> - tibble::add_column(cd_processo, .before =1) - + tibble::add_column(cd_processo_pg, .before =1) |> + tibble::add_column(cd_processo_sg, .after = 1) |> + dplyr::mutate(instancia = ifelse(is.na(cd_processo_sg), 1, 2), .after = 2) + }) - + }, NULL)) } +