data_DF = read.csv("C:/Users/Bamb0o0o0/Desktop/dataset.csv",header = TRUE) # import the data_DF as data_DF Frame
data_DF$logearnings = log(data_DF$earnings)
data_DF$occ1 = ifelse(data_DF$occ == 1,1,0)
data_DF$occ2 = ifelse(data_DF$occ == 2,1,0)
data_DF$occ3 = ifelse(data_DF$occ == 3,1,0)
data_DF$occ4 = ifelse(data_DF$occ == 4,1,0)
data_DF$northeast = ifelse(data_DF$region == 1,1,0)
data_DF$south = ifelse(data_DF$region == 2,1,0)
data_DF$midwest = ifelse(data_DF$region == 3,1,0)
data_DF$west = ifelse(data_DF$region == 4,1,0)
data_DF$central = ifelse(data_DF$region == 5,1,0)
data_DF$exp = data_DF$age - data_DF$edu - 6 # 6 the age people gets in to primary school
data_DF$exp_2 = (data_DF$exp)**2
data_DF$exp_3 = (data_DF$exp)**3
data_DF$edu_ability = data_DF$edu*data_DF$ability
# create a new data_DF Frame with out column earnings, occ, region
data_DF = subset(data_DF,select = -c(earnings,occ,region))
data = data.matrix(data_DF)
BootStrapping = function(){
id = sample(data[,1], 10000, replace = T)
# Data Frame: data_DF$id or data_DF[,1] class(id) now is integer, need less memory
# Matrix: data[,which(col.names(data)== 'id')], class(id) now is numeric, need more memory
New_Sample = data[id,]
# Screening data again
female_earnings = New_Sample[New_Sample[,4] == 1,]
male_earnings = subset(New_Sample, New_Sample[,4] == 0)
# OLS
Model_11_F = lm(female_earnings[,6] ~ female_earnings[,5] + female_earnings[,11]
+ female_earnings[,12] + female_earnings[,13] + female_earnings[,3]
+ female_earnings[,2] + female_earnings[,17] + female_earnings[,18]
+ female_earnings[,19] + female_earnings[,14])
coef(Model_11_F)
Model_11_M = lm(male_earnings[,6] ~ male_earnings[,5] + male_earnings[,11]
+ male_earnings[,12] + male_earnings[,13] + male_earnings[,3]
+ male_earnings[,2] + male_earnings[,17] + male_earnings[,18]
+ male_earnings[,19] + male_earnings[,14])
coef(Model_11_M)
return(c(coef(Model_11_F),coef(Model_11_M)))
}
Numbers_of_Replication = 1000
Coefficient_Matrix = replicate(Numbers_of_Replication,BootStrapping())
# Set Row names
rownames(Coefficient_Matrix) = c('Female_Intercept', 'Female_edu', 'Female_northeast',
'Female_south', 'Female_midwest', 'Female_age', 'Female_ability',
'Female_exp_2', 'Female_exp_3', 'Female_edu_ability', 'Female_west',
'Male_Intercept', 'Male_edu', 'Male_northeast','Male_south', 'Male_midwest',
'Male_age', 'Male_ability', 'Male_exp_2', 'Male_exp_3', 'Male_edu_ability', 'Male_west')
# Create Mean Matrix Residuals and STDER
Coefficient_Mean_Matrix = matrix(rowMeans(Coefficient_Matrix),
nrow = 22, ncol = Numbers_of_Replication, byrow = FALSE)
Residuals_Square = (Coefficient_Matrix - Coefficient_Mean_Matrix)^2
Residuals_Square_Sum = rowSums(Residuals_Square)
STDER = sqrt(1/(Numbers_of_Replication-1)*Residuals_Square_Sum)
这是新改的代码,谁帮我看看还有有什么地方可以改进下