-
Notifications
You must be signed in to change notification settings - Fork 23
/
03a-meow-cleaning.Rmd
68 lines (56 loc) · 1.77 KB
/
03a-meow-cleaning.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
---
title: "Lab 03a: Animal Word Cleaning"
subtitle: "CS631"
author: "Alison Hill"
output:
html_document:
theme: flatly
---
```{r setup, include = FALSE, cache = FALSE}
knitr::opts_chunk$set(error = TRUE, comment = NA, warning = FALSE, errors = FALSE, message = FALSE, tidy = FALSE, cache = FALSE)
```
# Data
http://langcog.github.io/wordbankr/
# Packages
```{r}
library(tidyverse)
library(wordbankr)
library(here)
```
```{r}
my_sounds <- c("meow", "woof woof", "cockadoodledoo")
# first get the items in the animal category
sounds <- get_item_data(language = "English (American)",
form = "WG") %>%
filter(definition %in% my_sounds)
# then get instrument data for those items
sounds_data <- get_instrument_data(
language = "English (American)",
form = "WG",
items = sounds$item_id,
administrations = TRUE,
iteminfo = TRUE
) %>%
mutate(produces = value == "produces",
understands = case_when(
produces == TRUE | value == "understands" ~ TRUE,
TRUE ~ FALSE
)) %>%
drop_na(produces) %>%
rename(sound = uni_lemma)
# what proportion of kids at each age understand/produce each word?
sounds_summary <- sounds_data %>%
group_by(age, sound) %>%
summarise(kids_produce = sum(produces, na.rm = TRUE),
kids_understand = sum(understands, na.rm = TRUE),
kids_respond = n_distinct(data_id),
prop_produce = kids_produce / kids_respond,
prop_understand = kids_understand / kids_respond)
```
Now let's export both data frames for the lab.
```{r}
write_csv(sounds_data, here::here("data",
"animal_sounds_data.csv"))
write_csv(sounds_summary, here::here("data",
"animal_sounds_summary.csv"))
```