forked from cnchapman/choicetools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtests.R
268 lines (211 loc) · 12 KB
/
tests.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# tests for choicetools
library(choicetools)
##### IN PROGRESS TESTS
################### CBC TESTS
set.seed(4567) # make it reproduceable
# PART 1: create a CBC design and simulate some choices with it
# define a CBC attribute structure with 5 attributes, each with 2-7 levels
tmp.attrs <- c(4, 4, 5, 2, 7)
# Create a CBC design matrix for that attribute structure.
# Not needed if you are importing data from Sawtooth Software or elsewhere
tmp.tab <- generateMNLrandomTab(
tmp.attrs, # our attribute structure
respondents=200, # how many design blocks to create
cards=3, # number of concepts per trial
trials=8) # N of trials per design block
# convert those cards to a design-coded dummy matrix
tmp.des <- convertSSItoDesign(tmp.tab)
# Now let's set up a test to see if we can recover the part worths ...
# make up some zero-sum part worths that we'll test against.
# This is not needed if you're using respondent data because the whole
# point then is to estimate the part worths, not make them up!
tmp.pws <- generateRNDpws(tmp.attrs)
# Use those known part worths to pick winning cards in the design matrix
# i.e., simulate fake respondent choices so we can see if we recover the
# part worths in estimation. Again, only needed in simulation purposes.
# This uses a single vector of part worths for all respondents.
tmp.win <- pickMNLwinningCards(tmp.des, tmp.pws, noise=TRUE)
# PART 2: estimate aggregate logit model and compare to known part worths
# estimate the MNL aggregate model
tmp.logit <- estimateMNLfromDesign(tmp.des,tmp.win)
# compare raw values of PWs side by side
cbind(t(tmp.logit),tmp.pws)
# simple check of estimated PWs against the "real" pws; potential rescaling
cor.test(t(tmp.logit),tmp.pws)
# simple estimate of scaling factor between real and estimated PWs
tmp.scale <- median(t(tmp.logit)/tmp.pws)
# compare raw scores with simple adjustment of scale factor
cbind(t(tmp.logit),tmp.pws*tmp.scale)
plot(t(tmp.logit),tmp.pws*tmp.scale)
# PART 3: estimate individual-level HB estimates
# note that this is less automatable than aggregate models
# so you may want to dive into the estimateMNLfromDesignHB() code
#
# run HB model for the design & its winning cards
tmp.logitHB <- estimateMNLfromDesignHB(tmp.tab, tmp.win,
kCards=3, kTrials=8, kResp=200,
mcmcIters=5000)
# get mean beta from the draws
tmp.meanbeta <- apply(tmp.logitHB$betadraw, 2, mean)
# check against the "real" pws (omit final zero-sum level of each attribute)
cor.test(tmp.meanbeta, tmp.pws[-cumsum(tmp.attrs)])
cbind(tmp.meanbeta, tmp.pws[-cumsum(tmp.attrs)],
t(tmp.logit[-cumsum(tmp.attrs)]))
plot(tmp.meanbeta, tmp.pws[-cumsum(tmp.attrs)])
# PART 4: look at how the market simulation works.
# Use those part worths to estimate preference share for two products.
# First let's do fake "individual level" part worths based on the true values
# (if you have real data, you'd estimate these with HB of course. This is
# just for simulation purposes.)
# Set up the matrix for them ...
tmp.ind.pws <- matrix(0,ncol=sum(tmp.attrs),nrow=200)
# and then fill with part worths plus noise
for (i in 1:200) { tmp.ind.pws[i,] <- tmp.pws + rnorm(sum(tmp.attrs)) }
# and now the market simulation
# define the products according to their levels within attribute
tmp.prod1 <- c(1, 5, 10, 14, 17) # define product 1
tmp.prod2 <- c(2, 6, 11, 15, 20) # define product 2
# estimate share for those using the individual part worths created above
# in practice, you would use part worths from HB estimation instead
# (from "estimateMNLfromDesignHB()" here, or from Sawtooth Software)
tmp.pref <- marketSim(
tmp.ind.pws, # matrix of individual-level utilities
list(tmp.prod1, tmp.prod2), # list of products to compare
use.none=FALSE, # we have no "none" column
use.error=TRUE, draws=20, # add some noise and resample
style="first") # estimate share by MNL approach
# see the overall preference share for the two products
colMeans(tmp.pref)
# result with seed 4567 as run directly from "set.seed" line above:
# 0.27 0.73
# Now the same thing with REAL HB data from HB estimation above
# get the individual-level data from the ChoiceModelR object
tmp.ind.pws2 <- extractHBbetas(tmp.logitHB, tmp.attrs)
# and repeat the simulation with that data
tmp.pref <- marketSim(
tmp.ind.pws2, # matrix of individual-level utilities
list(tmp.prod1, tmp.prod2), # list of products to compare
use.none=FALSE, # we have no "none" column
use.error=TRUE, draws=20, # add some noise and resample
style="first") # estimate share by MNL approach
# see the overall preference share for the two products
colMeans(tmp.pref)
# result:
# 0.32 0.68 # varies from above due to vagueries of ind-level estimation
## marketSim() demonstration code -- change the first line (and three lines after that, if needed) and then run the code inside the {} to see how the function works
if (FALSE) {
some.pw.data <- matrix(rnorm(33*100), ncol=33)
MY.PWS <- some.pw.data ### REPLACE THE RIGHT-HAND SIDE WITH YOUR OWN PART WORTH SET
# test/demonstrate IIA problem
p1a <- c(3,11,21,31) # "Red bus" defined as part worth columns 3, 11 etc
p1b <- c(3,11,21,31) # "Blue bus", identical to the "Red bus"
p2 <- c(4,12,22,32) # Competition
## logit shares -- show IIA problem when an identical product is introduced and takes too much share
redbus <- marketSim(MY.PWS,list(p1a, p2), use.none=FALSE, style="logit"); colMeans(redbus);
redbluebus <- marketSim(MY.PWS,list(p1a, p1b, p2), use.none=FALSE, style="logit"); colMeans(redbluebus);
## first choice shares -- no IIA problem; share is split between identical products
redbus <- marketSim(MY.PWS,list(p1a, p2), use.none=FALSE, style="first"); colMeans(redbus);
redbluebus <- marketSim(MY.PWS,list(p1a, p1b, p2), use.none=FALSE, style="first"); colMeans(redbluebus);
## roulette shares -- has some regression to IIA vs. first choice model and not deterministic due to random roulette draws
redbus <- marketSim(MY.PWS,list(p1a, p2), use.none=FALSE, style="roulette"); colMeans(redbus);
redbluebus <- marketSim(MY.PWS,list(p1a, p1b, p2), use.none=FALSE, style="roulette"); colMeans(redbluebus);
}
#########################
# example code for writeCBCdesignCSV and readCBCchoices
#
if (FALSE) {
# first let's source this file to get all the functions in memory
if (FALSE) { # DEV version
current.wd <- "~/Documents/R/Rcbc/" # <<=== CHANGE FOR YOUR SYSTEM IF NEEDED
source(paste(current.wd, "Rcbc-DEV.r", sep=""))
} else {
current.wd <- "~/Downloads/" # <<=== CHANGE FOR YOUR SYSTEM IF NEEDED
source(paste(current.wd, "Rcbc.r", sep=""))
}
set.seed(4567)
# define a CBC structure and create an experimental design matrix
attr.list <- c(3, 3, 5, 5, 4) # note that this matches the list of labels below, so we know the structure
tmp.tab <- generateMNLrandomTab(attr.list,respondents=3,cards=3,trials=12) # create random CBC design for the given list of attributes/levels
tmp.des <- convertSSItoDesign(tmp.tab) # extended design file we'll use later for estimation
# this example imagines we're doing a "designer USB flash drive"
#
# assign friendly names to the attributes and levels
attr.names <- c("Size", "Performance", "Design", "Memory", "Price")
# suggest: avoid using commas in the strings. Seems OK but not thoroughly tested in CSV upload/download/reading/Drive/LibreOffice/R
attr.labels <- c(
"Nano", "Thumb", "Full-length",
"Low speed", "Medium speed", "High speed",
"Tie-dye", "Silver", "Black", "White", "Prada",
"1.0GB", "8.0GB", "16GB", "32GB", "256GB",
"$9", "$29", "$59", "$89"
)
# write the CBC "survey" to a CSV
current.wd <- "~/Desktop/" # <<=== CHANGE FOR YOUR SYSTEM IF NEEDED
writeCBCdesignCSV(tmp.tab, attr.list=attr.list, lab.attrs=attr.names, lab.levels=attr.labels,
filename=paste(current.wd,"writeCBCtest.csv",sep=""), delimeter=",")
# to upload the CSV to Drive spreadsheet:
# 1. Go to Drive, and upload. Be sure to turn "Conversion" on.
# 2. Open it. Select first column, and then shift+rightarrow to highlight others. Expand the column widths
# 3. Suggest to center the text in columns B, C, D for easy reading
########### ... now go off and make some choices in the CSV file
########### save it and then come back here to read your choices ...
# to download from Drive:
# 1. File | Download as ... Comma Separated Values
current.wd <- "~/Downloads/" # <<=== CHANGE FOR YOUR SYSTEM IF NEEDED
# from Google Docs:
(tmp.win <- readCBCchoices(tmp.tab, filename=paste(current.wd, "writeCBCtest - Sheet 1.csv",sep="")))
# from other CSV writer:
(tmp.win <- readCBCchoices(tmp.tab, filename=paste(current.wd, "writeCBCtest.csv",sep="")))
# expand to full length and fill in any missing values with random choices
(tmp.win.exp <- expandCBCwinners(tmp.win))
# estimate the utilities from the data
tmp.pws <- estimateMNLfromDesign(tmp.des, tmp.win.exp)
(tmp.res <- data.frame(attr=attr.labels, util=t(tmp.pws)[,1])) # nicer formatting to import to a spreadsheet
# bootstrap it to get some empirical confidence
tmp.bs.log <- bootstrapMNLfromDesign(tmp.des,tmp.win.exp,cards=3,trials=12,bs.reps=20,bs.rescale=TRUE) # estimate 20 bootstrap models [in reality, should be 100-1000, not 10 !]
# jitter the results to make better density plotting on chart
# jitter the results aggressively since we have a small sample
tmp.bs.log <- data.frame(apply(tmp.bs.log,2,jitter, factor=40)) # for larger sample, try jittering with factor=2 to 5
colnames(tmp.bs.log) <- attr.labels # make the var. names match the attribute friendly names
summary(tmp.bs.log)
# and plot the bootstrap
require(ggplot2)
require(reshape2)
tmp.bs.log.melt <- melt(tmp.bs.log) # reformat the data to be ggplot friendly
# plot it!
(p <- ggplot(tmp.bs.log.melt, aes(x=variable, y=value)) +
geom_point(colour="dark blue", alpha=0.1, size=4) +
stat_summary(fun.data = "mean_cl_normal", geom="linerange",
size=4.5, colour="darkred") +
theme(axis.text.x=element_text(angle=-90, hjust=0, size=18))
)
# if you want to import it into a preso or something ...
png(paste(current.wd,"p.png",sep=""), width=1200, height=400) # or pdf, tiff, jpeg
p
dev.off()
}
#############################################################################
#############################################################################
#############################################################################
#############################################################################
#############################################################################
#############################################################################
# CPM TESTS
# EXAMPLE CODE:
# Suppose iris species are our "brands" and we examine their "positioning"
# with regards to the other predictor variables:
if (FALSE) {
data(iris)
cpm.plot(iris, "Species", names(iris)[1:4], zoom.out=10,
title.legend="Species")
# the same thing rotated, in case you want a different orientation
cpm.plot(iris, "Species", names(iris)[1:4], zoom.out=10,
title.legend="Species", rotate = 90)
}
#############################################################################
#############################################################################
#############################################################################
#############################################################################
#############################################################################
#############################################################################
# MAXDIFF TESTS