fitit.ado

/*******************************************************************************
*                                                                              *
*             Handles fitting the models and returning results                 *
*                                                                              *
*******************************************************************************/

*! fitit
*! v 0.0.9
*! 27FEB2024

// Drop program from memory if already loaded
cap prog drop fitit

// Define program
prog def fitit, eclass 

	// Version statement 
	version 15
	
	// Syntax
	syntax anything(name = cmd id="estimation command name"),				 ///   
			SPLit(passthru) RESults(string asis) [ KFold(integer 1) noall 	 ///   
			DISplay NAme(string asis)]

	// Check for missing name option
	if mi(`"`name'"') loc name xvfit
			
	// Create a collection to store estimation results
	if `c(stata_version)' >= 17 qui: collect create `name', replace
			
	// Test for invalid KFold option
	if `kfold' < 1 {
		
		// Display an error message
		di as err "There must always be at least 1 K-Fold.  This would be "	 ///   
		"the training set in a simple train/test split.  You specified "	 ///   
		"`kfold' K-Folds."
		
		// Return error code and exit
		err 198
		
	} // End IF Block for invalid K-Fold argument
	
	// Test whether the results option conforms to requirements to end with a 
	// letter
	if ustrregexm("`results'", "\d\$") {
		
		// Display error message
		di as err "The argument passed to results ends in a number.  The "	 ///   
		"last character must not be a number for this option."
		
		// Return error code
		err 198
		
	} // End IF Block for invalid results option
	
	// Create a macro to store the names of all the estimation results
	loc estres 
	
	// Call the command to generate the modified estimation command string
	cmdmod `cmd', `split' kf(`kfold')
	
	// Stores the returned modified prediction if expression so it can be 
	// returned by fitit
	loc predifin `r(predifin)'
	
	// Does the same with the macro used for the all training set component when
	// used with K-Fold CV
	loc kfpredifin `r(kfpredifin)'
	
	// Create a null local to store column names for results if displayed
	loc modord

	// Handles fitting for KFold and non-KFold CV
	forv k = 1/`kfold' {
		
		// Call the estimation command passed by the user
		if !mi(`"`: char _dta[modcmd]'"') {
			if `c(stata_version)' >= 17 qui: collect, name(`name'):`: char _dta[modcmd]'
			else qui: `: char _dta[modcmd]'
		}
		
		// Otherwise call the returned macro from cmdmod
		else {
			if `c(stata_version)' >= 17 qui: collect, name(`name'):`r(modcmd)'
			else `r(modcmd)'
		}
		
		// For simple train/test splits
		if `kfold' == 1 {
			
			// Add an appropriate title to the estimation results
			est title: Model Fit on Training Sample
		
			// Add corresponding title for display
			loc modord `modord' `k' "Training Set"
		
		} // End IF Block for simple train/test splits
		
		// Add a title for K-Fold cases
		else {
			
			// Adds an appropriate title to the estimation results
			est title: Model fit on Fold #`k'
			
			// Builds the titles for the display option
			loc modord `modord' `k' `"Fold #`k'"'
		
		} // End ELSE Block for K-Fold and LOO cases
		
		// Stores the estimation results in a more persistent way
		est sto `results'`k'
		
		// Return the estimation result name in a macro
		loc estres`k' "`results'`k'"
		
		// Add the name of the estimation results to the estres macro
		loc estres "`estres' `results'`k'"
			
	} // Loop over the KFolds

	// Test if K-Fold cross validation is being used
	if `kfold' > 1 & mi(`"`all'"') {
		
		// If the dataset characteristic is not missing
		if !mi(`"`: char _dta[kfmodcmd]'"') {
			
			// Call the estimation command stored in the characteristic
			if `c(stata_version)' >= 17 qui: collect, name(`name'):`: char _dta[kfmodcmd]'
			else qui: `: char _dta[kfmodcmd]'
		
		} // End IF Block for estimation command in characteristic
		
		// Otherwise, use the returned result from cmdmod
		else {
			if `c(stata_version)' >= 17 qui: collect, name(`name'):`r(kfmodcmd)'
			else qui: `r(kfmodcmd)'
		}
		// Test if user wants title added
		est title: Model Fitted on All Training Folds 
		
		// Adds a title to for the display option
		loc modord `modord' `= `kfold' + 1' "Whole Training Set"
		
		// Stores the estimation results in a more persistent way
		est sto `results'all
		
		// Return the estimation result name in a macro
		eret loc estresall "`results'all"
		
		// Add the name of the estimation results to the estres macro
		loc estres "`estres' `results'all"

	} // End IF Block for K-Fold CV fitting to all training data
	
	// Loop over the kfolds to return the individual stored result names
	forv k = 1/`kfold' {
		
		// Returns the individual estimation result names in their own macros
		eret loc estres`k' "`estres`k''"
		
	} // End Loop over the K-Folds to return the estimation result names
	
	// Return the names of all the stored estimation results
	eret loc estresnames "`estres'"
	
	// Return the predict macro 
	eret loc predifin `macval(predifin)'
	
	// Return the predict macro for the K-Fold case on all training data
	eret loc kfpredifin `macval(kfpredifin)'
		
	// Repost the estimation results to return them to users
	ereturn repost
	
	// Check for the display option
	if !mi("`display'") {
		
		// If Stata 17 or later
		if `c(stata_version)' >= 17 {
			
			// Collects standardized results from all models
			qui: collect style autolevels result _r_b _r_se N ll ll_0 r2 	 ///   
									 r2_a rmse rss mss df_m df_r F, name(`name')
										   
			// Don't display omitted levels in the results 
			qui: collect style showomit off, name(`name')
			
			// Don't display the base level of factor variables in the results
			qui: collect style showbase off, name(`name')
			
			// Don't display results for empty factor cells/interactions
			qui: collect style showempty off, name(`name')
			
			// Shows standard errors in parentheses
			qui: collect style cell result[_r_se], sformat("(%s)") name(`name')
			
			// Aligns the cell contents
			qui: collect style cell cell_type[item column-header], 			 ///   
													 name(`name') halign(center)
			
			// Omits the labels for coefficients and standard errors in the output
			qui: collect style header result[_r_b _r_se], level(hide) 		 ///   
														  name(`name')
			
			// Adds a little additional horizontal spacing between columns
			qui: collect style column, extraspace(1) name(`name')
			
			// Stacks the coefficients, SE, and other results and uses x as an 
			// interaction delimiter
			qui: collect style row stack, spacer delimiter(" x ") 			 ///   
							 name(`name') atdelimiter(" x ") bardelimiter(" x ")
										  
			// Defines levels for significance stars and adds a note to the end of 
			// the table with the definitions
			qui: collect stars _r_p 0.001 "***" 0.01 "**" 0.05 "*", 		 ///   
											  attach(_r_b) shownote name(`name')
			
			// Relabels some of the longer named model results to save space
			collect label levels result N "N" r2 "R^2" r2_a "Adj. R^2" 		 ///   
										F "F stat." rss "Residual SS" 		 ///   
										ll_0 "Log Likelihood, null model"    ///   
										mss "Model SS", name(`name') modify

			// Sets the numeric display format for all result cells to use a comma 
			// for the thousands delimiter and to display 3 significant digits
			qui: collect style cell result, name(`name') nformat(%24.3gc)
			
			// This attaches the labels for the results created during the model 
			// fitting above to the column headers
			qui: collect label levels cmdset `modord', name(`name')
			
			// This specifies how the results should be laid out.  The interaction 
			// in the first parenthetical is how the results for the coefficients 
			// and SE get displayed as rows and the second result provides the 
			// general model fit statistics.  The second parenthetical is used to 
			// say that there will be one column per estimation command collected.
			qui: collect layout (colname#result result)(cmdset)
			
			// Display the results
			collect preview
		
		} // End IF Block for Stata 17 or later
		
		// otherwise display results separately
		else {
			
			// Replay all the stored estimation results
			estimates replay `estres'
			
		} // End ELSE Block for older Stata

	} // End IF Block for display option
	
// End definition of the command
end