From sasCommunity
<
User:Statcompute
Statcompute's Blog
/************************************************
* ARMA - GARCH ESTIMATION USING PROC MODEL *
************************************************/
data garch;
lu = 0;
lh = 0;
ly = 0;
do i = 1 to 1000;
h = 0.3 + 0.4 * lu ** 2 + 0.5 * lh;
u = sqrt(h) * rannor(1);
y = 1 + 0.6 * (ly - 1) + u - 0.7 * lu;
lu = u;
lh = h;
ly = y;
output;
end;
run;
proc model data = garch;
parms ar1 ma1 mu;
y = mu + ar1 * zlag1(y - mu) + ma1 * zlag1(resid.y);
fit y / method = marquardt fiml white;
/* Nonlinear FIML Parameter Estimates
Approx Approx
Parameter Estimate Std Err t Value Pr > |t|
ar1 0.777183 0.0702 11.07 <.0001
ma1 0.841133 0.0639 13.16 <.0001
mu 1.020479 0.0392 26.01 <.0001
RESULT: ESTIMATION OF ARMA(1, 1) WITHOUT GARCH COMPONENT.
Heteroscedasticity Test
Equation Test Statistic DF Pr > ChiSq
y White's Test 89.85 9 <.0001
RESULT: WHITE'S TEST SUGGESTS HETEROSCEKASTICITY.
*/
test "AR1 = 0.6" ar1 = 0.6;
test "MA1 = 0.7" ma1 = 0.7;
/* Test Results
Test Type Statistic Pr > ChiSq
AR1 = 0.6 Wald 6.37 0.0116
MA1 = 0.7 Wald 4.88 0.0272
RESULT: ESTIMATES ARE DIFFERENT FROM SIMULATION PARAMETERS.
*/
run;
quit;
proc model data = garch;
parms ar1 ma1 mu arch0 arch1 garch1;
y = mu + ar1 * zlag1(y - mu) + ma1 * zlag1(resid.y);
h.y = arch0 + arch1 * xlag(resid.y ** 2, mse.y) +
garch1 * xlag(h.y, mse.y);
fit y / method = marquardt fiml out = forecast outall;
/* Nonlinear FIML Parameter Estimates
Approx Approx
Parameter Estimate Std Err t Value Pr > |t|
ar1 0.51709 0.1036 4.99 <.0001
ma1 0.658866 0.0866 7.61 <.0001
mu 0.989957 0.0257 38.54 <.0001
arch0 0.391227 0.0702 5.58 <.0001
arch1 0.398557 0.0539 7.39 <.0001
garch1 0.464724 0.0530 8.76 <.0001
RESULT: ESTIMATION OF ARMA(1, 1) WITH GARCH(1, 1).
*/
test "AR1 = 0.6" ar1 = 0.6;
test "MA1 = 0.7" ma1 = 0.7;
test "ARCH1 = 0.4" arch1 = 0.4;
test "GARCH1 = 0.5" garch1 = 0.5;
/* Test Results
Test Type Statistic Pr > ChiSq
AR1 = 0.6 Wald 0.64 0.4237
MA1 = 0.7 Wald 0.23 0.6347
ARCH1 = 0.4 Wald 0.00 0.9786
GARCH1 = 0.5 Wald 0.44 0.5059
RESULT: ESTIMATES ARE IN LINE WITH SIMULATION PARAMETERS.
*/
run;
quit;
|
/****************************************************************
* AN ARIMA MODEL FOR MONTHLY AVERAGE OF DAILY YIELDS ON MOODY'S *
* YIELDS ON MOODY'S AAA BONDS USING BOX-JENKINS METHOD *
*****************************************************************
* DATA IS FROM FTP.SAS.COM *
****************************************************************/
proc arima data = aaa;
identify var = aaa stationarity = (adf = 12);
/*
Autocorrelations
Lag Covariance Correlation -1 9 8 7 6 5 4 3 2 1 0 1 2 3 4 5 6 7 8 9 1
0 0.695786 1.00000 | |********************|
1 0.653869 0.93976 | . |******************* |
2 0.601293 0.86419 | . |***************** |
3 0.538163 0.77346 | . |*************** |
4 0.478751 0.68807 | . |************** |
RESULT: ACF WITH SLOW DECAY SUGGESTS NONSTATIONARITY.
Augmented Dickey-Fuller Unit Root Tests
Type Lags Rho Pr < Rho Tau Pr < Tau
Zero Mean 0 -0.3090 0.6095 -2.46 0.0146
1 -0.3280 0.6051 -1.56 0.1107
RESULT: UNIT-ROOT CANNOT BE REJECTED IN ADF TEST, INDICATING NONSTATIONARITY.
*/
identify var = aaa(1) stationarity = (adf = 12) minic p = 3 q = 3;
/*
Autocorrelations
Lag Covariance Correlation -1 9 8 7 6 5 4 3 2 1 0 1 2 3 4 5 6 7 8 9 1
0 0.019445 1.00000 | |********************|
1 0.0083451 0.42917 | . |********* |
2 -0.0017824 -.09167 | . **| . |
3 -0.0063956 -.32891 | *******| . |
4 -0.0028971 -.14899 | . ***| . |
RESULT: SPIKE AT LAG 1 IN ACF SUGGESTS MA(1) COMPONENT
Partial Autocorrelations
Lag Correlation -1 9 8 7 6 5 4 3 2 1 0 1 2 3 4 5 6 7 8 9 1
1 0.42917 | . |********* |
2 -0.33813 | *******| . |
3 -0.18152 | .****| . |
4 0.10901 | . |** . |
RESULT: SPIKES AT LAG 1 AND 2 IN PACF SUGGESTS AR(2) COMPONENTS.
Augmented Dickey-Fuller Unit Root Tests
Type Lags Rho Pr < Rho Tau Pr < Tau
Zero Mean 0 -29.7742 <.0001 -4.44 <.0001
1 -52.8587 <.0001 -5.06 <.0001
RESULT: UNIT-ROOT IS REJECTED IN ADF TEST, SUGGESTING STATIONARITY.
Autocorrelation Check for White Noise
To Chi- Pr >
Lag Square DF ChiSq ---------------Autocorrelations---------------
6 21.23 6 0.0017 0.429 -0.092 -0.329 -0.149 0.040 0.106
12 26.73 12 0.0085 -0.023 -0.047 -0.193 -0.152 -0.016 0.109
RESULT: WHITE NOICE IS REJECTED, INDICATING THE SERIES IS NOT WHITE NOICE.
Minimum Information Criterion
Lags MA 0 MA 1 MA 2 MA 3
AR 0 -4.19533 -4.32352 -4.26792 -4.25599
AR 1 -4.31697 -4.27034 -4.21377 -4.21214
... ...
Minimum Table Value: BIC(2,0) = -4.34467
RESULT: BIC SUGGESTS AN ARIMA(2, 1, 0) MODEL.
*/
estimate p = 2 method = ml;
/*
Maximum Likelihood Estimation
Standard Approx
Parameter Estimate Error t Value Pr > |t| Lag
MU -0.04910 0.01998 -2.46 0.0140 0
AR1,1 0.57453 0.12507 4.59 <.0001 1
AR1,2 -0.36273 0.13038 -2.78 0.0054 2
RESULT: ESTIMATION OF ARIMA(2, 1, 0) MODEL WITH SIGNIFICANT COMPONENTS
Autocorrelation Check of Residuals
To Chi- Pr >
Lag Square DF ChiSq ---------------Autocorrelations---------------
6 4.04 4 0.4011 -0.059 0.114 -0.178 0.024 -0.026 0.113
12 9.99 10 0.4418 -0.178 0.119 -0.188 -0.046 -0.018 0.020
RESULT: WHITE NOICE CANNOT BE REJECTED IN RESIDUALS CHECK, INDICATING A
SUFFICIENT FIT
*/
forecast lead = 3 id = date interval = month out = predicted;
/*
Forecasts for variable aaa
Obs Forecast Std Error 95% Confidence Limits
61 6.7970 0.1210 6.5598 7.0341
... ...
RESULT: FORECAST 3 MONTHS AHEAD AND OUTPUT PREDICTION TO A NEW DATA SET
*/
run;
quit;
|
/*************************************************
* MODEL ZERO-INFLATED POISSON REGRESSION USING *
* PROC NLMIXED AND PROC COUNTREG *
*************************************************/
data data (keep = x1 x2 y);
do i = 1 to 1000;
x1 = rannor(1);
x2 = rannor(0);
eta_lambda = 0.1 + x1 * 0.2 + x2 * 0.3;
lambda = exp(eta_lambda);
eta_prob = 0.4 + 0.5 * x1 + 0.6 * x2;
p_0 = exp(eta_prob) / (1 + exp(eta_prob));
zero_inflate = ranbin(0, 1, p_0);
if zero_inflate = 1 then y = 0;
else y = ranpoi(0, lambda);
output;
end;
run;
proc nlmixed data = data corr cov;
parms poisn_beta0 = 0 poisn_beta1 = 0 poisn_beta2 = 0
logit_beta0 = 0 logit_beta1 = 0 logit_beta2 = 0;
eta_prob = logit_beta0 + logit_beta1 * x1 + logit_beta2 * x2;
p_0 = exp(eta_prob) / (1 + exp(eta_prob));
eta_lambda = poisn_beta0 + poisn_beta1 * x1 + poisn_beta2 * x2;
lambda = exp(eta_lambda);
if y = 0 then loglike = log(p_0 + (1 - p_0) * exp(-lambda));
else loglike = log(1 - p_0) + y * log(lambda) - lambda - lgamma(y + 1);
model y ~ general(loglike);
run;
/* Parameter Estimates
Standard
Parameter Estimate Error DF t Value Pr > |t|
poisn_beta0 0.1965 0.06748 1000 2.91 0.0037
poisn_beta1 0.1183 0.06279 1000 1.88 0.0598
poisn_beta2 0.3587 0.07127 1000 5.03 <.0001
logit_beta0 0.3539 0.1253 1000 2.82 0.0048
logit_beta1 0.5757 0.1222 1000 4.71 <.0001
logit_beta2 0.7726 0.1538 1000 5.02 <.0001
... ...
*/
proc countreg data = data type = zip printall;
model y = x1 x2 / zi(link = logistic, var = x1 x2);
run;
/* Parameter Estimates
Standard Approx
Parameter Estimate Error t Value Pr > |t|
Intercept 0.196469 0.067476 2.91 0.0036
x1 0.118323 0.062792 1.88 0.0595
x2 0.358667 0.071274 5.03 <.0001
Inf_Intercept 0.353878 0.125317 2.82 0.0047
Inf_x1 0.575729 0.122236 4.71 <.0001
Inf_x2 0.772563 0.153843 5.02 <.0001
... ...
*/
|
/************************************************
* TWO GARCH MODELLING STRATEGIES IN SAS *
* USING PROC AUTOREG AND PROC MODEL *
************************************************/
data garch;
lu = 2;
lh = 2;
do i = 1 to 1000;
x = ranuni(1);
h = 0.3 + 0.4 * lu ** 2 + 0.5 * lh;
u = sqrt(h) * rannor(1);
y = 1 + 2 * x + u;
lu = u;
lh = h;
output;
end;
run;
proc autoreg data = garch;
Model1: model y = x / archtest;
/*
Q and LM Tests for ARCH Disturbances
Order Q Pr > Q LM Pr > LM
1 255.7878 <.0001 255.0606 <.0001
2 305.8608 <.0001 256.4065 <.0001
3 393.1746 <.0001 309.4953 <.0001
... ...
RESULT: WHITE NOICE TEST OF RESIDUAL IS REJECTED.
*/
Model2: model y = x / garch = (p = 1, q = 1);
run;
/*
GARCH Estimates
SSE 3462.09943 Observations 1000
MSE 3.46210 Uncond Var 2.62942257
Log Likelihood -1711.9614 Total R-Square 0.0888
SBC 3458.46151 AIC 3433.92273
Normality Test 0.7415 Pr > ChiSq 0.6902
RESULT: SBC AND AIC ARE USED TO SELECT MODELS.
NORMALITY OF RESIDUAL CANNOT BE REJECTED.
Standard Approx
Variable DF Estimate Error t Value Pr > |t|
Intercept 1 0.9223 0.0741 12.44 <.0001
x 1 2.0864 0.1243 16.78 <.0001
ARCH0 1 0.3286 0.0580 5.67 <.0001
ARCH1 1 0.3996 0.0563 7.10 <.0001
GARCH1 1 0.4754 0.0545 8.72 <.0001
RESULT: PARAMETERS ESTIMATED WITH STATISTICS TEST.
*/
proc model data = garch;
/* INITIALIZE THE PARAMETERS */
parms arch0 0.1 arch1 0.1 garch1 0.1 beta 0.1 ;
/* SPECIFY A MEAN MODEL */
y = intercept + beta * x;
/* SPECIFY A VARIANCE MODEL */
h.y = arch0 + arch1 * xlag(resid.y ** 2, mse.y) +
garch1 * xlag(h.y, mse.y);
/* FIT THE FINAL MODEL */
fit y / method = marquardt fiml;
run;
quit;
/*
The Equation to Estimate is
y = F(beta(x), intercept(1))
VAR(y) = H(arch0, arch1, garch1, beta, intercept)
Nonlinear FIML Parameter Estimates
Approx Approx
Parameter Estimate Std Err t Value Pr > |t|
arch0 0.328547 0.0556 5.91 <.0001
arch1 0.400868 0.0519 7.72 <.0001
garch1 0.474583 0.0502 9.45 <.0001
beta 2.085598 0.1250 16.69 <.0001
intercept 0.922616 0.0726 12.71 <.0001
RESULT: PARAMETERS ESTIMATED WITH STATISTICS TEST
*/
|
##################################################
# KEEP THE 1ST RECORD IN LIST BY THE SORT INDEX #
##################################################
from random import *
List = []
seed(1)
for i in xrange(10):
List.append([randint(1, 3), 'ROW' + str(i)])
print "ORIGINAL LIST"
print "+" + 15 * "-" + "+"
for row in List:
print "|" + str(row[0]).center(4) + "|" + row[1].center(10) + "|"
print "+" + 15 * "-" + "+"
"""
ORIGINAL LIST
+---------------+
| 1 | ROW0 |
| 3 | ROW1 |
| 3 | ROW2 |
| 1 | ROW3 |
| 2 | ROW4 |
| 2 | ROW5 |
| 2 | ROW6 |
| 3 | ROW7 |
| 1 | ROW8 |
| 1 | ROW9 |
+---------------+
"""
SortList = []
for i in xrange(len(List)):
SortList.append([List[i][0], i, List[i]])
SortList.sort()
KeepList = []
for i in set(x[0] for x in SortList):
index = [i, min(x[1] for x in SortList if x[0] == i)]
KeepList.extend(row[2] for row in SortList if row[:2] == index)
print "RECORDS LEFT"
print "+" + 15 * "-" + "+"
for row in KeepList:
print "|" + str(row[0]).center(4) + "|" + row[1].center(10) + "|"
print "+" + 15 * "-" + "+"
"""
RECORDS LEFT
+---------------+
| 1 | ROW0 |
| 2 | ROW4 |
| 3 | ROW1 |
+---------------+
"""
|
/************************************************
* A DEMO OF HOW TO FIND THE LAMBDA IN BOX-COX *
* TRANSFORMATION OF THE DEPENDENT VARIABLE *
************************************************/
/* SIMULATE A DATA THAT NEEDS LOG TRANSFORMATION */
data reg;
do i = 1 to 100;
x1 = ranuni(1);
x2 = ranuni(2);
y = exp(3 * x1 + 2 * x2 + rannor(1));
output;
end;
run;
/* HOW TO FIND THE LAMBDA IN BOXCOX TRANSFORMATION */
ods output boxcox = lambda;
proc transreg data = reg details;
model boxcox(y / lambda = -2 to 2 by 0.4
convenient
parameter = 1)
= identity(x1) identity(x2);
run;
/* SHOW THE LAMBDA WITH HIGHEST LOG LIKELIHOOD */
goptions reset = global;
symbol1 c = green l = 1 i = spline w = 1 v = circle cv = red h = 1;
title h = 2 c = blue 'BOXCOX TRANSFORMATION: LAMBDA vs. LOGLIKE';
proc gplot data = lambda;
plot loglike * lambda;
run;
quit;
|
/************************************************
* COMPUTE GRANGER-CAUSALITY TEST IN A VAR MODEL *
* USING PROC VARMAX *
************************************************/
/* SIMULATE A VAR SERIES */
proc iml;
sig = 100 * i(2);
phi = {-0.2 0.1, 0.5 0.2, 0.8 0.7, -0.4 0.6};
call varmasim(y, phi) sigma = sig n = 100 seed = 1;
cn = {'y1' 'y2'};
create ts1 from y[colname = cn];
append from y;
quit;
/* TEST FOR UNIT ROOT */
proc varmax data = ts1;
model y1 y2 / p = 2 dftest;
run;
/*
Dickey-Fuller Unit Root Tests
Variable Type Rho Pr < Rho Tau Pr < Tau
y1 Zero Mean -1.77 0.3577 -0.93 0.3125
Single Mean -8.20 0.1970 -2.23 0.1960
Trend -7.07 0.6459 -1.69 0.7491
y2 Zero Mean -9.17 0.0336 -2.10 0.0350
Single Mean -40.92 0.0009 -4.23 0.0010
Trend -41.19 0.0003 -4.09 0.0090
RESULT: NULL HYPOTHESIS OF UNIT ROOT CAN'T BE REJECTED
*/
/* TEST FOR UNIT ROOT AND CAUSALITY AFTER DIFFERENCING */
proc varmax data = ts1;
/* DIFFERENCING VARIABLES USING DIFY OPTION */
model y1 y2 / p = 2 dftest dify = (1);
/* SPECIFY CAUSAL STATEMENT FOR CAUSALITY TEST */
causal group1 = (y1) group2 = (y2);
run;
/*
Dickey-Fuller Unit Root Tests
Variable Type Rho Pr < Rho Tau Pr < Tau
y1 Zero Mean -357.22 0.0001 -13.32 <.0001
Single Mean -358.43 0.0001 -13.28 <.0001
Trend -393.25 0.0001 -13.91 <.0001
y2 Zero Mean -725.38 0.0001 -18.14 <.0001
Single Mean -725.39 0.0001 -18.05 <.0001
Trend -744.93 0.0001 -18.20 <.0001
RESULT: NULL HYPOTHESIS OF UNIT ROOT IS REJECTED
Granger-Causality Wald Test
Test DF Chi-Square Pr > ChiSq
1 2 40.30 <.0001
Test 1: Group 1 Variables: y1
Group 2 Variables: y2
RESULT: NULL HYPOTHESIS OF Y1 NOT CAUSED BY Y2 IS REJECTED
*/
|
/************************************************
* USE NEXT METHOD OF HITER OBJECT TO ACCESS THE *
* NEXT RECORD IN THE COLUMN *
************************************************/
data one;
do x = 1 to 10;
output;
end;
run;
data _null_;
if _n_ = 1 then do;
declare hash h (dataset: 'one', hashexp: 4, ordered: 'a');
h.definekey ('x');
h.definedata ('x');
h.definedone();
declare hiter iter('h');
end;
do rc = iter.next() by 0 while (rc = 0);
set one;
current = x;
/* KEY STEP: MOVE TO NEXT RECORD */
rc = iter.next();
next = x;
if rc = 0 then do;
put ' CURRENT RECORD = ' current ' NEXT RECORD =' next;
end;
end;
run;
|