|
The following source code was written by Owen Emlen for BrainTech, LLC. It
can be compiled using Visual Basic.NET. The formulas and logic for calculating
Pearson's r, partial and semi-partial correlations, beta weights, an intercept,
and big R-squared for a 2-IV multiple regression equation are well commented and
documented below. This source code is also available in its original (text)
format.
NEW:
Additional (Free) source code written by Owen Emlen for learning purposes / idea exchange / free
use (Released 01/13/2007):
|
Introduction To Programming Using Visual
Basic.net
|
Imports System.Math
Imports System.Collections.Generic
Public NotInheritable Class CBasicSummaryStats
Public dMean As Double
Public dStdDev As Double
Public dSum As Double
Public dMeanSq As Double
Public dVariance As Double
End Class
Public NotInheritable Class cDataSortedByDate
Public rgDataSortedByDate As New List(Of CDataEntry)(128)
Public rgDataLookupByDate As New Dictionary(Of DateTime, CDataEntry)(128)
Public dtFirstDataEntry As DateTime
Public dtLastDataEntry As DateTime
Public summaryStats As CSummaryStats
Public Sub CalculateAllCorrelationCoeffs(ByRef
meansd As CSummaryStats)
Dim Numerator As Double
Dim Denominator As Double
Dim Denom1 As Double
Dim Denom2 As Double
With meansd
Numerator = (.dN * .dSumX1Y)
- (.X1Stats.dSum * .YStats.dSum)
Denom1 = (.dN * .dSumX1Sq)
- (.X1Stats.dSum * .X1Stats.dSum)
Denom2 = (.dN * .dSumYSq)
- (.YStats.dSum * .YStats.dSum)
Denominator = Sqrt(Denom1)
* Sqrt(Denom2)
.RYX1 = Numerator / Denominator
Numerator = (.dN * .dSumX2Y)
- (.X2Stats.dSum * .YStats.dSum)
Denom1 = (.dN * .dSumX2Sq)
- (.X2Stats.dSum * .X2Stats.dSum)
Denom2 = (.dN * .dSumYSq)
- (.YStats.dSum * .YStats.dSum)
Denominator = Sqrt(Denom1)
* Sqrt(Denom2)
.RYX2 = Numerator / Denominator
Numerator = (.dN * .dSumX1X2)
- (.X1Stats.dSum * .X2Stats.dSum)
Denom1 = (.dN * .dSumX1Sq)
- (.X1Stats.dSum * .X1Stats.dSum)
Denom2 = (.dN * .dSumX2Sq)
- (.X2Stats.dSum * .X2Stats.dSum)
Denominator = Sqrt(Denom1)
* Sqrt(Denom2)
.RX1X2 = Numerator / Denominator
Numerator = .RYX1 - (.RX1X2
* .RYX2)
Denom1 = (1 - (.RX1X2 * .RX1X2))
Denom2 = (1 - (.RYX2 * .RYX2))
Denominator = Sqrt(Denom1)
* Sqrt(Denom2)
.PartialRX1_Y_X2 = Numerator
/ Denominator
Numerator = .RYX2 - (.RX1X2
* .RYX1)
Denom1 = (1 - (.RX1X2 * .RX1X2))
Denom2 = (1 - (.RYX1 * .RYX1))
Denominator = Sqrt(Denom1)
* Sqrt(Denom2)
.PartialRX2_Y_X1 = Numerator
/ Denominator
Numerator = .RYX1 - (.RYX2
* .RX1X2)
Denom1 = (1 - (.RX1X2 * .RX1X2))
Denominator = Sqrt(Denom1)
.SemiPartialRY_X1X2 = Numerator
/ Denominator
Numerator = .RYX2 - (.RYX1
* .RX1X2)
Denom1 = (1 - (.RX1X2 * .RX1X2))
Denominator = Sqrt(Denom1)
.SemiPartialRY_X2X1 = Numerator
/ Denominator
.BetaX1 = .SemiPartialRY_X1X2
* Sqrt(1 - (.RX1X2 * .RX1X2))
.BetaX2 = .SemiPartialRY_X2X1
* Sqrt(1 - (.RX1X2 * .RX1X2))
End With
End Sub
Public Sub FindDate(ByVal dt As DateTime) As
CDataEntry
If rgDataLookupByDate.ContainsKey(dt)
= True Then
Return rgDataLookupByDate(dt)
Else
Return Nothing
End If
End Function
Public Sub CalculateSummaryStatistics()
summaryStats = New CSummaryStats
Dim nCnt As Integer = 0
Dim das As CDataEntry
nCnt = rgDataSortedByDate.Count
summaryStats.dN = CDbl(nCnt)
Dim yValue As Double
Dim x1Value As Double
Dim x2Value As Double
Dim xLinear As Double
Dim xSQ As Double
Dim x2SQ As Double
Dim ySQ As Double
Dim dtMinDt As DateTime = DateTime.MaxValue
Dim fFirst As Boolean = True
With summaryStats
For Each
das In rgDataSortedByDate
If
fFirst = True Then
dtMinDt
= das.dt
fFirst
= False
End
If
Dim ts As
TimeSpan
ts = das.dt.Date.Subtract(dtMinDt)
xLinear
= ts.TotalDays() + 1
x1Value
= xLinear * xLinear
x2Value
= GetX2FromDate(das.dt)
das.dXLinear
= xLinear
das.dX =
x1Value
das.dX2
= x2Value
yValue =
das.dY
.X1Stats.dSum
+= x1Value
.X2Stats.dSum
+= x2Value
.YStats.dSum
+= yValue
.dSumX1Y
+= x1Value * yValue
.dSumX2Y
+= x2Value * yValue
xSQ = x1Value
* x1Value
x2SQ = x2Value
* x2Value
ySQ = yValue
* yValue
.dSumX1Sq
+= xSQ
.dSumX2Sq
+= x2SQ
.dSumYSq
+= ySQ
.dSumX1X2
+= x1Value * x2Value
Next
.X1Stats.dMean = summaryStats.X1Stats.dSum
/ summaryStats.dN
.X2Stats.dMean = summaryStats.X2Stats.dSum
/ summaryStats.dN
.YStats.dMean = summaryStats.YStats.dSum
/ summaryStats.dN
.X1Stats.dMeanSq = .X1Stats.dMean
* .X1Stats.dMean
.X2Stats.dMeanSq = .X2Stats.dMean
* .X2Stats.dMean
.YStats.dMeanSq = .YStats.dMean
* .YStats.dMean
.X1Stats.dVariance = (.dSumX1Sq
/ .dN) - .X1Stats.dMeanSq
.X2Stats.dVariance = (.dSumX2Sq
/ .dN) - .X2Stats.dMeanSq
.YStats.dVariance = (.dSumYSq
/ .dN) - .YStats.dMeanSq
If .X1Stats.dVariance
<= 0 Then
.X1Stats.dStdDev
= 0
Else
.X1Stats.dStdDev
= Math.Sqrt(.X1Stats.dVariance)
End If
If .X2Stats.dVariance
<= 0 Then
.X2Stats.dStdDev
= 0
Else
.X2Stats.dStdDev
= Math.Sqrt(.X2Stats.dVariance)
End If
If .YStats.dVariance
<= 0 Then
.YStats.dStdDev
= 0
Else
.YStats.dStdDev
= Math.Sqrt(.YStats.dVariance)
End If
End With
CalculateAllCorrelationCoeffs(summaryStats)
With summaryStats
.B1 = .RYX1 * (.YStats.dStdDev
/ .X1Stats.dStdDev)
.A1 = .YStats.dMean - (.B1
* .X1Stats.dMean)
.B2 = .RYX2 * (.YStats.dStdDev
/ .X2Stats.dStdDev)
.A2 = .YStats.dMean - (.B2
* .X2Stats.dMean)
.BX1_12 = .BetaX1 * (.YStats.dStdDev
/ .X1Stats.dStdDev)
.BX2_12 = .BetaX2 * (.YStats.dStdDev
/ .X2Stats.dStdDev)
.A12 = .YStats.dMean - (.BX1_12
* .X1Stats.dMean) - (.BX2_12 * .X2Stats.dMean)
End With
End Sub
End Class
Public NotInheritable Class CSummaryStats
Public dN As Double
Public X1Stats As New CBasicSummaryStats
Public X2Stats As New CBasicSummaryStats
Public YStats As New CBasicSummaryStats
Public dSumX1Y As Double
Public dSumX1Sq As Double
Public dSumX2Y As Double
Public dSumX2Sq As Double
Public dSumYSq As Double
Public dSumX1X2 As Double
Public RYX1 As Double
Public RYX2 As Double
Public RX1X2 As Double
Public PartialRX1_Y_X2 As Double
Public PartialRX2_Y_X1 As Double
Public SemiPartialRY_X1X2 As Double
Public SemiPartialRY_X2X1 As Double
Public BetaX1 As Double
Public BetaX2 As Double
Public B1 As Double
Public A1 As Double
Public B2 As Double
Public A2 As Double
Public BX1_12 As Double
Public BX2_12 As Double
Public A12 As Double
Public Sub PredictYFromX1(ByVal Xval As Double)
As Double
Return (B1 * Xval) + A1
End Function
Public Sub PredictYFromX2(ByVal X2val As Double)
As Double
Return (B2 * X2val) + A2
End Function
Public Sub PredictYFromX1X2(ByVal X1val As Double,
ByVal X2val As Double) As Double
Return (BX1_12 * X1val) + (BX2_12 * X2val)
+ A12
End Function
End Class
Public Module SummaryStatsModule
Public Sub InitResiduals(ByVal sd As cDataSortedByDate)
Dim das As CDataEntry
With sd.summaryStats
For Each
das In sd.rgDataSortedByDate
das.dExpectedX1
= .PredictYFromX1(das.dX)
das.dResidualX1
= das.dY - das.dExpectedX1
das.dExpectedX2
= .PredictYFromX2(das.dX2)
das.dResidualX2
= das.dY - das.dExpectedX2
das.dExpectedX1X2
= .PredictYFromX1X2(das.dX, das.dX2)
das.dResidualX1X2
= das.dY - das.dExpectedX1X2
Next
End With
End Sub
Public Sub GetXLinearFromDate(ByVal dt As DateTime,
ByVal dtOldest As Date) As Double
Dim ts As TimeSpan
ts = dt.Date.Subtract(dtOldest)
Return CDbl(CInt(ts.TotalDays) + 1) * 0.1
End Function
Public Sub GetX1FromDate(ByVal dt As DateTime,
ByVal dtOldest As Date) As Double
Dim ts As TimeSpan
ts = dt.Date.Subtract(dtOldest)
Dim d As Double = CDbl(CInt(ts.TotalDays) +
1)
Return d * d * 0.01
End Function
Public Sub GetX2FromDate(ByVal dt As DateTime)
As Double
Const dWeekendVal As Double = 2.5
Const dWeekdayVal As Double = -1.0
Dim d As Double
Select Case dt.DayOfWeek
Case Is = DayOfWeek.Saturday
d = dWeekendVal
Case Is = DayOfWeek.Sunday
d = dWeekendVal
Case Else
d = dWeekdayVal
End Select
Return d
End Function
End Module
Public NotInheritable Class CDataEntry : Implements IComparable
Public dt As DateTime
Public dX As Double
Public dX2 As Double
Public dXLinear As Double
Public dY As Double
Public dResidualX1 As Double
Public dResidualX2 As Double
Public dResidualX1X2 As Double
Public dExpectedX1 As Double
Public dExpectedX2 As Double
Public dExpectedX1X2 As Double
Public Sub CompareTo(ByVal obj As Object) As Integer
_
Implements System.IComparable.CompareTo
Dim dsCompareTo As CDataEntry = DirectCast(obj,
CDataEntry)
If Me.dt > dsCompareTo.dt Then
Return 1
If Me.dt < dsCompareTo.dt
Then Return -1
Return 0
End Function
End Class