orth {orth}R Documentation

Logistic Regression For Correlated Binary Data By Way of Orthogonalized Residuals or ALR

Description

Fits mean and association models for correlated binary data based on orthogonalized residuals. The link function for the mean model is the logit link and the link function for the association model is the log-odds ratio.

Usage

orth(formula, data, weights , formula.z, dataz, id, contrasts = NULL, 
     alp.start = NULL, bet.start = NULL, lambda0 = 0, maxiter = 20, tol = 1e-06, 
     estLam = FALSE, monitor = FALSE, ...)

Arguments

formula Two-sided formula for mean model
data Data where the response and the covariates for the mean model are stored
weights Cluster weights
formula.z One-sided formula for the association model
dataz Data where the covariates for the association model are stored
id Cluster id
contrasts model contrasts to be used
alp.start Starting values for association parameters
bet.start Starting values for mean parameters
lambda0 Starting values for lambda
maxiter Maximum number of iterations
tol Size of precision
estLam logical: 1 if we want to estimate lambda and 0 if no
monitor logical: 1 if we want to monitor the progress of the estimation
...

Details

If estLam = FALSE then lambda is set to 0 and alternating logistic regression based on orthogonalized residuals is fit to the data.

Value

Returns an S4 class of class "orth" with the following slots

call The call of the ``orth'' class
betas The mean parameters – belongs to vector class
alphas The association parameters – belongs to vector class
variance A list containing both the naive and robust covariance matrices
num.iter Number of iterations performed by the procedure
converge Logical: 1 if procedure converge
lambda Estimated lambda if estLam=TRUE is specified in orth
score A list containing information about the score vector and Fisher information

Warning

If your data set contains many clusters with cluster sizes of 20 or more
then you will experience a noticeably long time for the estimation routine to finish.

References

R. C. Zink. Correlated Binary Regression Using Orthogonalized Residuals.
PhD thesis, University of North Carolina, Chapel Hill, 2003.

Examples


library(orth)

## EXAMPLE 1:  Six cities study.
##   
## Model formula for the mean:         1 + SMOKE + age
## Model formula for the association : 1 + SMOKE + agediff
##
## The link function for the mean model is the logit link.
## For the association model, we are modelling the log of the pairwise odds ratio 
## within each cluster.  
data(six.city)

## Creating the Z-matrix for the association model
n = as.vector( table( as.factor(six.city$id) ) )
last <- cumsum(n);
first <- last - n + 1;
z.6city <- NULL
for ( i in 1:length(n) )
{
    n.i <- n[i]
    id.i <- six.city$id[ first[i] ]
    age.i <- six.city$age[ first[i]:last[i] ]
    smoke.i <- six.city$smoke[ first[i]:last[i] ]
    
    l <- 1
    
    if (n.i == 1) {z.i <- cbind(0,0,0)}
    else
    {
        ## Note: ch2(m) = m(m - 1)/2
        id.i <- rep(id.i, choose(n.i, 2))
        z.i1 <- rep(NA, choose(n.i, 2) )
        z.i2 <- rep(NA, choose(n.i, 2) )
        for( j in seq(1, n.i - 1) )
        {
            for( k in seq(j+1, n.i) )
            {
                z.i1[l] <- abs( age.i[j] - age.i[k] )
                z.i2[l] <- smoke.i[1]
                l <- l+1
            }
        }
        z.i <- cbind(id.i, z.i1, z.i2)
    }

    z.6city <- rbind(z.6city, z.i) 
}

## Remove any row of zeros.  These correspond to cluster size 1 which do  ##
## not exists in the association matrix.                                  ##
z.6city <- data.frame(z.6city)
names(z.6city) <- c("id", "agediff", "smoke")
z.6city <- z.6city[(z.6city$id != 0), ]

orth1 =  orth(y~age+smoke,data=six.city, weights=count, formula.z=~agediff + smoke, dataz=z.6city, 
              id=id, maxiter=20, tol=0.001, estLam=TRUE)
orth1
              
o1.s <- summary(orth1)
o1.s
              
## Fits a mean model without intercept but same association model.
orth2 = update(orth1, . ~ . -1)

## Computes cluster diagnostics ##
cd = diag.cls(orth1)

## Cluster level Cook's D based on robust and naive covariance matrices
cook.robust(cd)
cook.naive(cd)

## Cluster level dbetas and dalphas
df.bet(cd)
df.alp(cd)

## Cluster level leverage for betas and alphas
lev.alp(cd)
lev.bet(cd)

## Computes observation level diagnostics.  We do not recomend computing
## these values if you have a bunch of clusters that have cluster size
## of 7 or more.  Side effects include high blood pressure and excessive
## swearing caused by excessive waiting time for the observation level
## diagnostics routine to finish.  If you have taken pills for these, go
## for it.  
od = diag.obs(orth1)
cook.robust(od)
cook.naive(od)
df.bet(od)
df.alp(od)

#------------------------------------------------------------------------------#
## EXAMPLE 2:  Urinary incontinence study.
##   
## Model formula for the mean: 1 + FEMALE + age + toilet + toilet2 + severe 
#                                + dayacc + dayacc2
## Model formula for the association : sameMD 
##
## The link function for the mean model is the logit link.
## For the association model, we are modelling the log of the pairwise odds ratio 
## within each cluster.
  
data(ui)

ui$toilet2 = ui$toilet^2
ui$dayacc2 = ui$dayacc^2

## Creates the Z matrix for the association model.

n = as.vector( table(as.factor(ui$pract_id)) )
last = cumsum(n)
first = last - n + 1
z.ui <- NULL

for ( i in 1:length(n) )
{
    n.i <- n[i]
    practID.i <- ui$pract_id[ first[i]]
    doctrID.i <- ui$doct_id[ first[i]:last[i] ]
    
    l <- 1

    if (n.i == 1) { z.i <- cbind( 0,0) }
    else
    {
        practID.i <- rep( practID.i, choose(n.i, 2) )
        z.i1 <- rep( NA, choose(n.i, 2) )
        for( j in seq(1, n.i-1) )
        {
            for (k in seq(j+1, n.i) )
            {
                z.i1[l] <- ( doctrID.i[j] == doctrID.i[k] )
                l <- l + 1
            }
        }
        z.i <- cbind(practID.i, z.i1)
    }
    z.ui <- rbind(z.ui, z.i)
}
z.ui <- data.frame(z.ui)
## Remove any row of zeros.  These correspond to cluster size 1 which do  ##
## not exists in the association matrix.                                  ##
names(z.ui) <- c("PracticeID", "sameMD")
z.ui <- z.ui[(z.ui$PracticeID != 0),]
z.ui$sameMD <- as.factor(z.ui$sameMD)

orth3 = orth(bothered~factor(female)+age+toilet+toilet2+severe+dayacc+dayacc2,data = ui,
             formula.z=~-1 + sameMD, dataz=z.ui, id=pract_id, maxiter=55, tol=0.001, estLam=TRUE)

## This data contains several clusters with huge cluster size.  So observation
## level diagnostics are not recommended.  Cluster level diagnostics are fast
## to compute.
cd = diag.cls(orth3)
df.bet(cd)
df.alp(cd)
lev.bet(cd)
lev.alp(cd)
cook.robust(cd)
cook.naive(cd)


#------------------------------------------------------------------------------#
## EXAMPLE 3:  Coalminer data from McCullagh and Nelder (1989).

## Model formula for the marginal mean:  1 + age
##
## Model formula for the marginal association : 1 + age
##
## The link for the mean model is the LOGIT link.  For the association, we
## are modelling the pairwise log-odds ratio of an event.

data(coalminer)

n = as.vector( table(as.factor(coalminer$id)) )
last = cumsum(n)
first = last - n + 1

age.i <- rep( NA, length(n) )

for(i in 1:length(n) )
{
    age.i[i] <- coalminer$ageCntrd[first[i]]
}

z.coalminer <- data.frame( id = as.vector(unique(coalminer$id) ), age=age.i )

orth4 = orth(y~-1 + factor(wheeze) + factor(wheeze):ageCntrd, data=coalminer, weights=count,
             formula.z = ~age, dataz=z.coalminer, id=id, maxiter=10, tol=0.001, estLam=FALSE)
             
## Looks at estimates and standard errors based on robust covariance
summary(orth4)

## Looks at estimates and standard errors based on model-based covariance
summary( summary(orth4) )

## Cluster level diagnostics 
cd = diag.cls(orth4)
df.bet(cd)
df.alp(cd)
lev.bet(cd)
lev.alp(cd)
cook.robust(cd)
cook.naive(cd)

#------------------------------------------------------------------------------#


[Package orth version 1.5 Index]