-
Notifications
You must be signed in to change notification settings - Fork 0
/
Hackathon2020_2.R
70 lines (51 loc) · 3.22 KB
/
Hackathon2020_2.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
library(tidyverse)
library(rvest)
library(lubridate)
messages1 <- read_html("https://raw.githubusercontent.com/r-classes/dh-hackathon-2019-2020-june-annaoskina/master/messages.html")
messages2 <- read_html("https://raw.githubusercontent.com/r-classes/dh-hackathon-2019-2020-june-annaoskina/master/messages2.html")
messages1 %>%
html_nodes("div.body") %>%
html_text -> one
messages2 %>%
html_nodes("div.body") %>%
html_text -> two
two %>%
enframe() %>%
mutate(date = str_extract(value, "[1-3]?[0-9]\\s[a-zA-Z]{3,9}\\s20[12][0-9]")) %>%
fill(date) -> two_df
one %>%
enframe() %>%
mutate(date = str_extract(value, "[1-3]?[0-9]\\s[a-zA-Z]{3,9}\\s20[12][0-9]")) %>%
fill(date) %>%
mutate(text = str_match(""))
one_df$value[1:20] %>%
str_match("(?:(?:Овен)|(?:ОВЕН))(.*)(?:(?:Телец)|(?:ТЕЛЕЦ))") %>%
as_tibble() %>%
select(2)
one_df%>%
str_match("(?:Гороскоп\\sна)(.*)(?:(?:Овен)|(?:ОВЕН))(.*)(?:(?:Телец)|(?:ТЕЛЕЦ))(.*)(?:(?:Близнецы)|(?:БЛИЗНЕЦЫ))(.*)(?:(?:Рак)|(?:РАК))(.*)(?:(?:Лев)|(?:ЛЕВ))(.*)(?:(?:Дева)|(?:ДЕВА))(.*)(?:(?:Весы)|(?:ВЕСЫ))(.*)(?:(?:Скорпион)|(?:СКОРПИОН))(.*)(?:(?:Стрелец)|(?:СТРЕЛЕЦ))(.*)(?:(?:Козерог)|(?:КОЗЕРОГ))(.*)(?:(?:Водолей)|(?:ВОДОЛЕЙ))(.*)(?:(?:Рыбы)|(?:РЫБЫ))(.*)") %>%
as_tibble()->one_new %>%
na.omit -> one_new
#set_names(3:14, "Oven", "Telets", "Bliznetsy", "Rak", "Lev", "Deva", "Vesy", "Skorpion", "Strelets", "Kozerog", "Vodoley", "Ryby") %>% -> one_new
two_df %>%
str_match("(?:Гороскоп\\sна)(.*)(?:(?:Овен)|(?:ОВЕН))(.*)(?:(?:Телец)|(?:ТЕЛЕЦ))(.*)(?:(?:Близнецы)|(?:БЛИЗНЕЦЫ))(.*)(?:(?:Рак)|(?:РАК))(.*)(?:(?:Лев)|(?:ЛЕВ))(.*)(?:(?:Дева)|(?:ДЕВА))(.*)(?:(?:Весы)|(?:ВЕСЫ))(.*)(?:(?:Скорпион)|(?:СКОРПИОН))(.*)(?:(?:Стрелец)|(?:СТРЕЛЕЦ))(.*)(?:(?:Козерог)|(?:КОЗЕРОГ))(.*)(?:(?:Водолей)|(?:ВОДОЛЕЙ))(.*)(?:(?:Рыбы)|(?:РЫБЫ))(.*)") %>%
as_tibble() -> two_new
one_df %>%
mutate(oven = str_match(value, "(?:ОВЕН)(.*)(?:(?:Телец))")) %>%
fill(oven)
two %>%
enframe() %>%
mutate(date = str_extract(value, "[1-3]?[0-9]\\s[a-zA-Z]{3,9}\\s20[12][0-9]")) %>%
fill(date) -> frame_two
one_df$value %>%
str_match("(?:Гороскоп\\sна)(.*)(?:(?:Овен)|(?:ОВЕН))(.*)(?:(?:Телец)|(?:ТЕЛЕЦ))(.*)(?:(?:Близнецы)|(?:БЛИЗНЕЦЫ))(.*)(?:(?:Рак)|(?:РАК))(.*)(?:(?:Лев)|(?:ЛЕВ))(.*)(?:(?:Дева)|(?:ДЕВА))(.*)(?:(?:Весы)|(?:ВЕСЫ))(.*)(?:(?:Скорпион)|(?:СКОРПИОН))(.*)(?:(?:Стрелец)|(?:СТРЕЛЕЦ))(.*)(?:(?:Козерог)|(?:КОЗЕРОГ))(.*)(?:(?:Водолей)|(?:ВОДОЛЕЙ))(.*)(?:(?:Рыбы)|(?:РЫБЫ))(.*)") %>%
as_tibble() -> one_new
one_new %>%
mutate(date = str_extract(V2, "[1-3]?[0-9]\\s[а-яА-Я]{3,9}")) %>%
fill(date) -> frame_with_data
frame_with_data %>%
select(3:15) %>%
na.omit() -> frame_with_data
frame_with_data %>%
write_tsv("hackathon.tsv")
ggplot(aes(data, ))