-
Notifications
You must be signed in to change notification settings - Fork 2
/
fitit.ado
259 lines (181 loc) · 8.28 KB
/
fitit.ado
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
/*******************************************************************************
* *
* Handles fitting the models and returning results *
* *
*******************************************************************************/
*! fitit
*! v 0.0.9
*! 27FEB2024
// Drop program from memory if already loaded
cap prog drop fitit
// Define program
prog def fitit, eclass
// Version statement
version 15
// Syntax
syntax anything(name = cmd id="estimation command name"), ///
SPLit(passthru) RESults(string asis) [ KFold(integer 1) noall ///
DISplay NAme(string asis)]
// Check for missing name option
if mi(`"`name'"') loc name xvfit
// Create a collection to store estimation results
if `c(stata_version)' >= 17 qui: collect create `name', replace
// Test for invalid KFold option
if `kfold' < 1 {
// Display an error message
di as err "There must always be at least 1 K-Fold. This would be " ///
"the training set in a simple train/test split. You specified " ///
"`kfold' K-Folds."
// Return error code and exit
err 198
} // End IF Block for invalid K-Fold argument
// Test whether the results option conforms to requirements to end with a
// letter
if ustrregexm("`results'", "\d\$") {
// Display error message
di as err "The argument passed to results ends in a number. The " ///
"last character must not be a number for this option."
// Return error code
err 198
} // End IF Block for invalid results option
// Create a macro to store the names of all the estimation results
loc estres
// Call the command to generate the modified estimation command string
cmdmod `cmd', `split' kf(`kfold')
// Stores the returned modified prediction if expression so it can be
// returned by fitit
loc predifin `r(predifin)'
// Does the same with the macro used for the all training set component when
// used with K-Fold CV
loc kfpredifin `r(kfpredifin)'
// Create a null local to store column names for results if displayed
loc modord
// Handles fitting for KFold and non-KFold CV
forv k = 1/`kfold' {
// Call the estimation command passed by the user
if !mi(`"`: char _dta[modcmd]'"') {
if `c(stata_version)' >= 17 qui: collect, name(`name'):`: char _dta[modcmd]'
else qui: `: char _dta[modcmd]'
}
// Otherwise call the returned macro from cmdmod
else {
if `c(stata_version)' >= 17 qui: collect, name(`name'):`r(modcmd)'
else `r(modcmd)'
}
// For simple train/test splits
if `kfold' == 1 {
// Add an appropriate title to the estimation results
est title: Model Fit on Training Sample
// Add corresponding title for display
loc modord `modord' `k' "Training Set"
} // End IF Block for simple train/test splits
// Add a title for K-Fold cases
else {
// Adds an appropriate title to the estimation results
est title: Model fit on Fold #`k'
// Builds the titles for the display option
loc modord `modord' `k' `"Fold #`k'"'
} // End ELSE Block for K-Fold and LOO cases
// Stores the estimation results in a more persistent way
est sto `results'`k'
// Return the estimation result name in a macro
loc estres`k' "`results'`k'"
// Add the name of the estimation results to the estres macro
loc estres "`estres' `results'`k'"
} // Loop over the KFolds
// Test if K-Fold cross validation is being used
if `kfold' > 1 & mi(`"`all'"') {
// If the dataset characteristic is not missing
if !mi(`"`: char _dta[kfmodcmd]'"') {
// Call the estimation command stored in the characteristic
if `c(stata_version)' >= 17 qui: collect, name(`name'):`: char _dta[kfmodcmd]'
else qui: `: char _dta[kfmodcmd]'
} // End IF Block for estimation command in characteristic
// Otherwise, use the returned result from cmdmod
else {
if `c(stata_version)' >= 17 qui: collect, name(`name'):`r(kfmodcmd)'
else qui: `r(kfmodcmd)'
}
// Test if user wants title added
est title: Model Fitted on All Training Folds
// Adds a title to for the display option
loc modord `modord' `= `kfold' + 1' "Whole Training Set"
// Stores the estimation results in a more persistent way
est sto `results'all
// Return the estimation result name in a macro
eret loc estresall "`results'all"
// Add the name of the estimation results to the estres macro
loc estres "`estres' `results'all"
} // End IF Block for K-Fold CV fitting to all training data
// Loop over the kfolds to return the individual stored result names
forv k = 1/`kfold' {
// Returns the individual estimation result names in their own macros
eret loc estres`k' "`estres`k''"
} // End Loop over the K-Folds to return the estimation result names
// Return the names of all the stored estimation results
eret loc estresnames "`estres'"
// Return the predict macro
eret loc predifin `macval(predifin)'
// Return the predict macro for the K-Fold case on all training data
eret loc kfpredifin `macval(kfpredifin)'
// Repost the estimation results to return them to users
ereturn repost
// Check for the display option
if !mi("`display'") {
// If Stata 17 or later
if `c(stata_version)' >= 17 {
// Collects standardized results from all models
qui: collect style autolevels result _r_b _r_se N ll ll_0 r2 ///
r2_a rmse rss mss df_m df_r F, name(`name')
// Don't display omitted levels in the results
qui: collect style showomit off, name(`name')
// Don't display the base level of factor variables in the results
qui: collect style showbase off, name(`name')
// Don't display results for empty factor cells/interactions
qui: collect style showempty off, name(`name')
// Shows standard errors in parentheses
qui: collect style cell result[_r_se], sformat("(%s)") name(`name')
// Aligns the cell contents
qui: collect style cell cell_type[item column-header], ///
name(`name') halign(center)
// Omits the labels for coefficients and standard errors in the output
qui: collect style header result[_r_b _r_se], level(hide) ///
name(`name')
// Adds a little additional horizontal spacing between columns
qui: collect style column, extraspace(1) name(`name')
// Stacks the coefficients, SE, and other results and uses x as an
// interaction delimiter
qui: collect style row stack, spacer delimiter(" x ") ///
name(`name') atdelimiter(" x ") bardelimiter(" x ")
// Defines levels for significance stars and adds a note to the end of
// the table with the definitions
qui: collect stars _r_p 0.001 "***" 0.01 "**" 0.05 "*", ///
attach(_r_b) shownote name(`name')
// Relabels some of the longer named model results to save space
collect label levels result N "N" r2 "R^2" r2_a "Adj. R^2" ///
F "F stat." rss "Residual SS" ///
ll_0 "Log Likelihood, null model" ///
mss "Model SS", name(`name') modify
// Sets the numeric display format for all result cells to use a comma
// for the thousands delimiter and to display 3 significant digits
qui: collect style cell result, name(`name') nformat(%24.3gc)
// This attaches the labels for the results created during the model
// fitting above to the column headers
qui: collect label levels cmdset `modord', name(`name')
// This specifies how the results should be laid out. The interaction
// in the first parenthetical is how the results for the coefficients
// and SE get displayed as rows and the second result provides the
// general model fit statistics. The second parenthetical is used to
// say that there will be one column per estimation command collected.
qui: collect layout (colname#result result)(cmdset)
// Display the results
collect preview
} // End IF Block for Stata 17 or later
// otherwise display results separately
else {
// Replay all the stored estimation results
estimates replay `estres'
} // End ELSE Block for older Stata
} // End IF Block for display option
// End definition of the command
end