Let’s create some data.

Here’s a positive relationship.

j = 50

a = data.frame(x=1:100, y=jitter(1:100, j))
plot(a)

Here’s a negative relationship.

b = data.frame(x=101:200, y=jitter(100:1, j))
bb = data.frame(x=101:200, y=jitter(seq(50,0.5,-0.5), 100))
plot(bb)

Are these any different?

boxplot(list(before=a$y,after=bb$y))

t.test(a$y,b$y)
## 
##  Welch Two Sample t-test
## 
## data:  a$y and b$y
## t = -0.0049783, df = 197.72, p-value = 0.996
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.343723  8.301703
## sample estimates:
## mean of x mean of y 
##  51.07787  51.09888

Let’s display them side by side.

plot(x=1:200, y=rep(1,200), type="n", ylim=c(0,100), 
     xlab="time", ylab="y")
abline(v=100)
points(a$x, a$y, pch=2, col=2)
points(bb$x, bb$y, pch=3, col=4)

abline(lm(y~x, data=a), col=2)
lines(x=1:100, y=lm(y~x, data=a)$fit, col=2)
# abline(lm(y~x, data=bb), col=4)
lines(x=101:200, y=lm(y~x, data=bb)$fit, col=4)

Let’s simulate a change in level.

a2 = data.frame(x=101:200, y=jitter(1:100, j))

plot(x=1:200, y=rep(1,200), type="n", ylim=c(-50,150), 
     xlab="time", ylab="y")
abline(v=100)
points(a$x, a$y, pch=2, col=2)
points(a2$x, a2$y, pch=3, col=4)

abline(lm(y~x, data=a), col=2)
abline(lm(y~x, data=a2), col=4)

We can’t capture that with a simple test.

boxplot(list(before=a$y,after=a2$y))

t.test(a$y,a2$y)
## 
##  Welch Two Sample t-test
## 
## data:  a$y and a2$y
## t = 0.24522, df = 197.98, p-value = 0.8065
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -7.115953  9.137014
## sample estimates:
## mean of x mean of y 
##  51.07787  50.06734

Now let’s go back to the previous example:

m = rbind(a, data.frame(x=101:200, y=jitter(seq(50,0.5,-0.5), j)))
plot(m$x, m$y, xlab="time", ylab="y")

Here’s what a simple model might look like:

summary(lm(y~x, data=m))
## 
## Call:
## lm(formula = y ~ x, data = m)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -53.354 -16.932  -3.208  10.347  68.753 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 51.74621    3.62894  14.259  < 2e-16 ***
## x           -0.13607    0.03131  -4.346 2.22e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.56 on 198 degrees of freedom
## Multiple R-squared:  0.08708,    Adjusted R-squared:  0.08247 
## F-statistic: 18.89 on 1 and 198 DF,  p-value: 2.217e-05

Let’s see if we can model those trends and change in level explicitly.

m$time = m$x
m$intervention = m$time > 100
m$time_after_intervention = ifelse(m$time > 100, m$time - 100, 0)

m$time
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108
## [109] 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
## [127] 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
## [145] 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162
## [163] 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
## [181] 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198
## [199] 199 200
m$intervention
##   [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [97] FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [109]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [121]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [133]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [145]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [157]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [169]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [181]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [193]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
m$time_after_intervention
##   [1]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##  [19]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##  [37]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##  [55]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##  [73]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##  [91]   0   0   0   0   0   0   0   0   0   0   1   2   3   4   5   6   7   8
## [109]   9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26
## [127]  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42  43  44
## [145]  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62
## [163]  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80
## [181]  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96  97  98
## [199]  99 100
rdd = lm(y ~ time + intervention + time_after_intervention, data=m)
summary(rdd)
## 
## Call:
## lm(formula = y ~ time + intervention + time_after_intervention, 
##     data = m)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0891  -3.2357   0.1754   3.3189   9.2600 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               1.14244    0.93352   1.224    0.222    
## time                      0.98882    0.01605  61.614   <2e-16 ***
## interventionTRUE        -48.87539    1.31041 -37.298   <2e-16 ***
## time_after_intervention  -1.50534    0.02270 -66.325   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.633 on 196 degrees of freedom
## Multiple R-squared:  0.9703, Adjusted R-squared:  0.9699 
## F-statistic:  2136 on 3 and 196 DF,  p-value: < 2.2e-16

Q: Can you achieve the same result (i.e., capture both trends and the change in level) with only two variables? A: Yes, with an interaction term!

rdd2 = lm(y ~ time * intervention, data=m)
summary(rdd2)
## 
## Call:
## lm(formula = y ~ time * intervention, data = m)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0891  -3.2357   0.1754   3.3189   9.2600 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             1.14244    0.93352   1.224    0.222    
## time                    0.98882    0.01605  61.614   <2e-16 ***
## interventionTRUE      101.65813    2.63057  38.645   <2e-16 ***
## time:interventionTRUE  -1.50534    0.02270 -66.325   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.633 on 196 degrees of freedom
## Multiple R-squared:  0.9703, Adjusted R-squared:  0.9699 
## F-statistic:  2136 on 3 and 196 DF,  p-value: < 2.2e-16