3 Data structures

In R we have objects which are functions and objects which are data.

  • Function examples:
    • sin()

    • integrate()

    • plot()

    • paste()

  • Data examples:
    • 42

    • 1:5

    • “R”

    • matrix(1:12, nrow=4, ncol=3)

    • data.frame(a=1:5, tmt=c(“a”,“b”,“a”,“b”,“a”))

    • list(x=2, y=“abc”, x=1:10)

3.1 Vector

> # Vector of numbers, e.g:
> c(1,1.2,pi,exp(1))
## [1] 1.000 1.200 3.142 2.718
> 
> # We can have vectors of other things too, e.g:
> c(TRUE,1==2)
## [1]  TRUE FALSE
> c("a","ab","abc")
## [1] "a"   "ab"  "abc"
> 
> # But not combinations, e.g:
> c("a",5,1==2)
## [1] "a"     "5"     "FALSE"
> # Notice that R just turned everything into characters!

3.1.1 Constructing vectors

> # Integers from 9 to 17
> x<-9:17
> x
## [1]  9 10 11 12 13 14 15 16 17
> 
> # A sequence of 11 numbers from 0 to 1
> y<-seq(0,1,length=11)
> y
##  [1] 0.0 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1.0
> 
> # The same number or the same vector several times
> z<-rep(1:2, 5)
> z
##  [1] 1 2 1 2 1 2 1 2 1 2
> 
> # Combine numbers, vectors or both into a new vector
> xz10<-c(x,z,10)
> xz10
##  [1]  9 10 11 12 13 14 15 16 17  1  2  1  2  1  2  1  2  1  2 10

3.1.2 Index and logical index

> # Define a vector with integers from (-5) to 5 and extract the numbers with absolute value less than 3:
> x<- (-5):5
> x
##  [1] -5 -4 -3 -2 -1  0  1  2  3  4  5
> 
> # by their index in the vector:
> x[4:8]
## [1] -2 -1  0  1  2
> 
> # or, by negative selection (set a minus in front of the indices we don't want):
> x[-c(1:3,9:11)]
## [1] -2 -1  0  1  2
> 
> # A logical vector can be defined by:
> index<-abs(x)<3
> index 
##  [1] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE
> 
> # Now this vector can be used to extract the wanted numbers:
> x[index]
## [1] -2 -1  0  1  2

3.2 Matrix

  • Similar to vectors we can have matrices of objects of the same type, e.g:
> matrix(c(1,2,3,4,5,6)+pi,nrow=2)
##       [,1]  [,2]  [,3]
## [1,] 4.142 6.142 8.142
## [2,] 5.142 7.142 9.142
> 
> matrix(c(1,2,3,4,5,6)+pi,nrow=2)<6
##      [,1]  [,2]  [,3]
## [1,] TRUE FALSE FALSE
## [2,] TRUE FALSE FALSE
> 
> # We can create higher order arrays, e.g:
> array(c(1:24), dim=c(4,3,2))
## , , 1
## 
##      [,1] [,2] [,3]
## [1,]    1    5    9
## [2,]    2    6   10
## [3,]    3    7   11
## [4,]    4    8   12
## 
## , , 2
## 
##      [,1] [,2] [,3]
## [1,]   13   17   21
## [2,]   14   18   22
## [3,]   15   19   23
## [4,]   16   20   24

3.2.1 Constructing matrices

> 
> # Combine rows into a matrix
> A<-rbind(1:3, c(1,1,2))
> A
##      [,1] [,2] [,3]
## [1,]    1    2    3
## [2,]    1    1    2
> 
> # Or columns
> B<-cbind(1:3, c(1,1,2))
> B
##      [,1] [,2]
## [1,]    1    1
## [2,]    2    1
## [3,]    3    2
> 
> # Define a matrix from one long vector
> C<-matrix(c(1,0,0,1,1,0,1,1,1), nrow=3, ncol=3)
> C
##      [,1] [,2] [,3]
## [1,]    1    1    1
## [2,]    0    1    1
## [3,]    0    0    1
> 
> # Can also be done by rows by adding "byrow=TRUE" before the last parenthesis. Try!

3.2.2 Index and logical index

> A<-matrix((-4):5, nrow=2, ncol=5)
> A
##      [,1] [,2] [,3] [,4] [,5]
## [1,]   -4   -2    0    2    4
## [2,]   -3   -1    1    3    5
> 
> 
> # Negative values 
> A[A<0]
## [1] -4 -3 -2 -1
> 
> # Assignments
> A[A<0]<-0
> A
##      [,1] [,2] [,3] [,4] [,5]
## [1,]    0    0    0    2    4
## [2,]    0    0    1    3    5
> 
> # Matrix rows can be selected by
> A[2,]
## [1] 0 0 1 3 5
> 
> # and similarly for columns
> A[,c(2,4)] 
##      [,1] [,2]
## [1,]    0    2
## [2,]    0    3

3.2.3 Properties of vectors and matrices

  • The R function mode() when applied to a vector or to a matrix detects the type of singles that is stored:
> A<-matrix(rep(c(TRUE,FALSE),2),nrow=2)
> 
> B<-rnorm(4)
> 
> C<-matrix(LETTERS[1:9],nrow=3)
> 
> A;B;C
##       [,1]  [,2]
## [1,]  TRUE  TRUE
## [2,] FALSE FALSE
## [1]  1.7138  0.8554  0.7930 -1.3228
##      [,1] [,2] [,3]
## [1,] "A"  "D"  "G" 
## [2,] "B"  "E"  "H" 
## [3,] "C"  "F"  "I"
> 
> mode(A); mode(B); mode(C)
## [1] "logical"
## [1] "numeric"
## [1] "character"
  • Vectors and matrices have lengths: the length is the number of elements:
> x<-matrix(c(NA,2:12),ncol=3)
> x
##      [,1] [,2] [,3]
## [1,]   NA    5    9
## [2,]    2    6   10
## [3,]    3    7   11
## [4,]    4    8   12
> 
> length(x[1,])
## [1] 3
> 
> length(x)
## [1] 12
> 
> # The dimension of a matrix is the number of rows and columns: The number of columns is the second element:
> dim(x); dim(x)[2]
## [1] 4 3
## [1] 3

3.2.4 Naming rows and columns in a matrix

  • We can add names to a matrix with the colnames() and rownames() functions:
> x<-matrix(rnorm(12),nrow=4)
> x
##         [,1]    [,2]    [,3]
## [1,]  0.9202 -0.4745 -0.2831
## [2,]  0.1585 -0.2804 -0.6047
## [3,] -1.1698 -1.2134 -0.5080
## [4,] -0.2999 -1.4372  0.8759
> 
> colnames(x)<-paste("data",1:3,sep="")
> 
> rownames(x)<-paste("obs",1:4,sep="")
> 
> x
##        data1   data2   data3
## obs1  0.9202 -0.4745 -0.2831
## obs2  0.1585 -0.2804 -0.6047
## obs3 -1.1698 -1.2134 -0.5080
## obs4 -0.2999 -1.4372  0.8759
> 
> y<-matrix(rnorm(15),nrow=5)
> y
##          [,1]    [,2]    [,3]
## [1,]  0.05725  0.9804 -1.8222
## [2,] -0.99361 -1.4403  1.7824
## [3,]  0.99407  1.7198  1.9227
## [4,]  0.16685  1.4679 -0.7306
## [5,]  0.72052 -0.8524 -1.3746
> 
> colnames(y)<-LETTERS[1:ncol(y)]
> 
> rownames(y)<-letters[1:nrow(y)]
> 
> y
##          A       B       C
## a  0.05725  0.9804 -1.8222
## b -0.99361 -1.4403  1.7824
## c  0.99407  1.7198  1.9227
## d  0.16685  1.4679 -0.7306
## e  0.72052 -0.8524 -1.3746

3.2.5 Matrix multiplication

> M<-matrix(rnorm(20),nrow=4,ncol=5)
> N<-matrix(rnorm(15),nrow=5,ncol=3)
> 
> M%*%N
##         [,1]    [,2]   [,3]
## [1,] -2.9955  0.6733 -1.020
## [2,] -2.4249 -0.4178 -2.198
## [3,]  0.2123 -1.0215 -3.137
## [4,]  1.0913 -0.8827 -1.053
> 
> # Can we perform N*M? No! A and B are not compatible!! Try to run:
> # N%*%M

3.2.6 Additional functions

> M<-matrix(rnorm(16),nrow=4,ncol=4)
> 
> dim(M)
## [1] 4 4
> 
> t(M)
##         [,1]     [,2]    [,3]    [,4]
## [1,] 0.10115 -0.12413  0.9378  1.2825
## [2,] 0.29297  0.02105 -1.0539 -0.8166
## [3,] 0.57005 -0.10685  1.3300  0.4510
## [4,] 0.04554  0.78997  0.2430 -0.5366
> 
> det(M)
## [1] 0.5723
> 
> (invM <- solve(M))
##        [,1]    [,2]     [,3]    [,4]
## [1,] 0.9554  1.1337 -0.79049  1.3921
## [2,] 1.7441  0.5640 -0.89636  0.5725
## [3,] 0.6728 -0.5987  0.60243 -0.5515
## [4,] 0.1947  1.3480 -0.01885  0.1289
> 
> eigen(M)
## eigen() decomposition
## $values
## [1]  1.6905+0.000i -0.5639+0.801i -0.5639-0.801i  0.3528+0.000i
## 
## $vectors
##             [,1]              [,2]              [,3]       [,4]
## [1,] -0.33786+0i -0.219463+0.2577i -0.219463-0.2577i  0.6779+0i
## [2,] -0.08072+0i -0.319630-0.4519i -0.319630+0.4519i  0.6352+0i
## [3,] -0.87322+0i  0.005361-0.3768i  0.005361+0.3768i -0.0564+0i
## [4,] -0.34179+0i  0.661115+0.0000i  0.661115+0.0000i  0.3657+0i

3.3 Simple Functions

  • The above functions are built-in functions. However, it is simple to write your own functions:
> # a square function
> square<-function(x){
+   x*x
+ }
> 
> square(2)
## [1] 4
> square(1:5)
## [1]  1  4  9 16 25
> 
> # a power function with two arguments
> power<-function(x,pow){
+   x^pow
+ }
> 
> power(2,3)
## [1] 8
> power(1:5,3)
## [1]   1   8  27  64 125

3.4 Default arguments

  • A function can have default arguments:
> power<-function(x,pow=2){
+   x^pow
+ }
> 
> power(1:5)
## [1]  1  4  9 16 25
> 
> power(1:5,4)
## [1]   1  16  81 256 625

3.5 Simple plots

> x<-seq(0,6, length=100)
> 
> y<-2*x+3+rnorm(100)
> 
> plot(x,y)

> 
> plot(sin,0,2*pi)