R : Copyright 2005, The R Foundation for Statistical Computing Version 2.1.1 (2005-06-20), ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for a HTML browser interface to help. Type 'q()' to quit R. > ### *
> ### > attach(NULL, name = "CheckExEnv") > assign(".CheckExEnv", as.environment(2), pos = length(search())) # base > ## add some hooks to label plot pages for base and grid graphics > setHook("plot.new", ".newplot.hook") > setHook("persp", ".newplot.hook") > setHook("grid.newpage", ".gridplot.hook") > > assign("cleanEx", + function(env = .GlobalEnv) { + rm(list = ls(envir = env, all.names = TRUE), envir = env) + RNGkind("default", "default") + set.seed(1) + options(warn = 1) + delayedAssign("T", stop("T used instead of TRUE"), + assign.env = .CheckExEnv) + delayedAssign("F", stop("F used instead of FALSE"), + assign.env = .CheckExEnv) + sch <- search() + newitems <- sch[! sch %in% .oldSearch] + for(item in rev(newitems)) + eval(substitute(detach(item), list(item=item))) + missitems <- .oldSearch[! .oldSearch %in% sch] + if(length(missitems)) + warning("items ", paste(missitems, collapse=", "), + " have been removed from the search path") + }, + env = .CheckExEnv) > assign("..nameEx", "__{must remake R-ex/*.R}__", env = .CheckExEnv) # for now > assign("ptime", proc.time(), env = .CheckExEnv) > grDevices::postscript("Design-Examples.ps") > assign("par.postscript", graphics::par(no.readonly = TRUE), env = .CheckExEnv) > options(contrasts = c(unordered = "contr.treatment", ordered = "contr.poly")) > options(warn = 1) > library('Design') Loading required package: Hmisc Hmisc library by Frank E Harrell Jr Type library(help='Hmisc'), ?Overview, or ?Hmisc.Overview') to see overall documentation. NOTE:Hmisc no longer redefines [.factor to drop unused levels when subsetting. To get the old behavior of Hmisc type dropUnusedLevels(). Attaching package: 'Hmisc' The following object(s) are masked from package:stats : ecdf Loading required package: survival Loading required package: splines Attaching package: 'survival' The following object(s) are masked from package:Hmisc : untangle.specials Design library by Frank E Harrell Jr Type library(help='Design'), ?Overview, or ?Design.Overview') to see overall documentation. Attaching package: 'Design' The following object(s) are masked from package:survival : Surv cox.zph survfit The following object(s) are masked from package:Hmisc : .R. .SV4. .noGenenerics > > assign(".oldSearch", search(), env = .CheckExEnv) > assign(".oldNS", loadedNamespaces(), env = .CheckExEnv) > cleanEx(); ..nameEx <- "Design.Misc" > > ### * Design.Misc > > flush(stderr()); flush(stdout()) > > ### Name: Design.Misc > ### Title: Miscellaneous Design Attributes and Utility Functions > ### Aliases: Design.Misc Varcov.cph Varcov.glmD Varcov.glsD Varcov.lrm > ### Varcov.ols Varcov.psm oos.loglik oos.loglik.ols oos.loglik.lrm > ### oos.loglik.cph oos.loglik.psm oos.loglik.glmD num.intercepts Getlim > ### Getlimi related.predictors interactions.containing param.order > ### Penalty.matrix Penalty.setup lrtest univarLR Newlabels Newlevels > ### Newlabels.Design Newlevels.Design DesignFit print.Design > ### residuals.Design print.lrtest > ### Keywords: models methods > > ### ** Examples > > ## Not run: > ##D f <- psm(S ~ x1 + x2 + sex + race, dist='gau') > ##D g <- psm(S ~ x1 + sex + race, dist='gau', > ##D fixed=list(scale=exp(f$parms))) > ##D lrtest(f, g) > ##D > ##D g <- Newlabels(f, c(x2='Label for x2')) > ##D g <- Newlevels(g, list(sex=c('Male','Female'),race=c('B','W'))) > ##D nomogram(g) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "Design" > > ### * Design > > flush(stderr()); flush(stdout()) > > ### Name: Design > ### Title: Design Methods and Generic Functions > ### Aliases: Design > ### Keywords: models regression survival math manip methods > > ### ** Examples > > ## Not run: > ##D library(Design, first=TRUE) # omit first for R > ##D dist <- datadist(data=2) # can omit if not using summary, plot, survplot, > ##D # or if specify all variable values to them. Can > ##D # also defer. data=2: get distribution summaries > ##D # for all variables in search position 2 > ##D # run datadist once, for all candidate variables > ##D dist <- datadist(age,race,bp,sex,height) # alternative > ##D options(datadist="dist") > ##D f <- cph(Surv(d.time, death) ~ rcs(age,4)*strat(race) + > ##D bp*strat(sex)+lsp(height,60),x=TRUE,y=TRUE) > ##D anova(f) > ##D anova(f,age,height) # Joint test of 2 vars > ##D fastbw(f) > ##D summary(f, sex="female") # Adjust sex to "female" when testing > ##D # interacting factor bp > ##D plot(f, age=NA, height=NA) # 3-D plot > ##D plot(f, age=10:70, height=60) > ##D latex(f) # LaTeX representation of fit > ##D > ##D f <- lm(y ~ x) # Can use with any fitting function that > ##D # calls model.frame.default, e.g. lm, glm > ##D specs.Design(f) # Use .Design since class(f)="lm" > ##D anova(f) # Works since Varcov(f) (=Varcov.lm(f)) works > ##D fastbw(f) > ##D options(datadist=NULL) > ##D f <- ols(y ~ x1*x2) # Saves enough information to do fastbw, anova > ##D anova(f) # Will not do plot.Design since distributions > ##D fastbw(f) # of predictors not saved > ##D plot(f, x1=seq(100,300,by=.5), x2=.5) > ##D # all values defined - don't need datadist > ##D dist <- datadist(x1,x2) # Equivalent to datadist(f) > ##D options(datadist="dist") > ##D plot(f, x1=NA, x2=.5) # Now you can do plot, summary > ##D nomogram(f, interact=list(x2=c(.2,.7))) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "Design.trans" > > ### * Design.trans > > flush(stderr()); flush(stdout()) > > ### Name: Design.trans > ### Title: Design Special Transformation Functions > ### Aliases: Design.trans asis pol lsp rcs catg scored strat matrx \%ia\% > ### Keywords: models regression math manip methods survival smooth > > ### ** Examples > > ## Not run: > ##D options(knots=4, poly.degree=2) > ##D country <- factor(country.codes) > ##D blood.pressure <- cbind(sbp=systolic.bp, dbp=diastolic.bp) > ##D fit <- lrm(Y ~ sqrt(x1)*rcs(x2) + rcs(x3,c(5,10,15)) + > ##D lsp(x4,c(10,20)) + country + blood.pressure + poly(age,2)) > ##D # sqrt(x1) is an implicit asis variable, but limits of x1, not sqrt(x1) > ##D # are used for later plotting and effect estimation > ##D # x2 fitted with restricted cubic spline with 4 default knots > ##D # x3 fitted with r.c.s. with 3 specified knots > ##D # x4 fitted with linear spline with 2 specified knots > ##D # country is an implied catg variable > ##D # blood.pressure is an implied matrx variable > ##D # since poly is not a Design function (pol is), it creates a > ##D # matrx type variable with no automatic linearity testing > ##D # or plotting > ##D f1 <- lrm(y ~ rcs(x1) + rcs(x2) + rcs(x1) %ia% rcs(x2)) > ##D # %ia% restricts interactions. Here it removes terms nonlinear in > ##D # both x1 and x2 > ##D f2 <- lrm(y ~ rcs(x1) + rcs(x2) + x1 %ia% rcs(x2)) > ##D # interaction linear in x1 > ##D f3 <- lrm(y ~ rcs(x1) + rcs(x2) + x1 %ia% x2) > ##D # simple product interaction (doubly linear) > ##D # Use x1 %ia% x2 instead of x1:x2 because x1 %ia% x2 triggers > ##D # anova to pool x1*x2 term into x1 terms to test total effect > ##D # of x1 > ## End(Not run) > > > > cleanEx(); ..nameEx <- "Function" > > ### * Function > > flush(stderr()); flush(stdout()) > > ### Name: Function > ### Title: Compose an S Function to Compute X beta from a Fit > ### Aliases: Function.Design Function.cph sascode > ### Keywords: regression methods interface models survival math > > ### ** Examples > > set.seed(1331) > x1 <- exp(rnorm(100)) > x2 <- factor(sample(c('a','b'),100,rep=TRUE)) > dd <- datadist(x1, x2) > options(datadist='dd') > y <- log(x1)^2+log(x1)*(x2=='b')+rnorm(100)/4 > f <- ols(y ~ pol(log(x1),2)*x2) > f$coef Intercept x1 x1^2 x2=b x1 * x2=b x1^2 * x2=b 0.050809859 0.012335335 0.991077315 -0.049218199 1.001128354 -0.003440629 > g <- Function(f, digits=5) > g function (x1 = 1.0906, x2 = "a") { x1 <- log(x1) 0.05081 + 0.012335 * x1 + 0.99108 * x1^2 - 0.049218 * (x2 == "b") + (x2 == "b") * (1.0011 * x1 - 0.0034406 * x1^2) } > sascode(g) x1 = log(x1) 0.05081 + 0.012335 * x1 + 0.99108 * x1**2 - 0.049218 * (x2 = "b") + (x2 = "b") * (1.0011 * x1 - 0.0034406 * x1**2); > g() [1] 0.05933444 > g(x1=c(2,3), x2='b') #could omit x2 since b is default category [1] 1.178566 2.306994 > predict(f, expand.grid(x1=c(2,3),x2='b')) 1 2 1.178584 2.307022 > g8 <- Function(f) # default is 8 sig. digits > g8(x1=c(2,3), x2='b') [1] 1.178584 2.307022 > options(datadist=NULL) > > ## Not run: > ##D # Make self-contained functions for computing survival probabilities > ##D # using a log-normal regression > ##D f <- psm(Surv(d.time, death) ~ rcs(age,4)*sex, dist='gaussian') > ##D g <- Function(f) > ##D surv <- Survival(f) > ##D # Compute 2 and 5-year survival estimates for 50 year old male > ##D surv(c(2,5), g(age=50, sex='male')) > ## End(Not run) > > > > cleanEx(); ..nameEx <- "Overview" > > ### * Overview > > flush(stderr()); flush(stdout()) > > ### Name: Overview > ### Title: Overview of Design Library > ### Aliases: Overview Design.Overview > ### Keywords: models > > ### ** Examples > > ###################### > # Detailed Example 1 # > ###################### > # May want to first invoke the Hmisc store function > # so that new variables will go into a temporary directory > set.seed(17) # So can repeat random number sequence > n <- 500 > > sex <- factor(sample(c('female','male'), n, rep=TRUE)) > age <- rnorm(n, 50, 10) > sys.bp <- rnorm(n, 120, 7) > > # Use two population models, one with a systolic > # blood pressure effect and one without > > L <- ifelse(sex=='female', .1*(pmin(age,50)-50), .005*(age-50)^2) > L.bp <- L + .4*(pmax(sys.bp,120)-120) > > dz <- ifelse(runif(n) <= plogis(L), 1, 0) > dz.bp <- ifelse(runif(n) <= plogis(L.bp), 1, 0) > > # Use summary.formula in the Hmisc library to summarize the > # data one predictor at a time > > s <- summary(dz.bp ~ age + sex + sys.bp) > options(digits=3) > print(s) dz.bp N=500 +-------+-----------+---+-----+ | | |N |dz.bp| +-------+-----------+---+-----+ |age |[15.1,43.6)|125|0.648| | |[43.6,50.3)|125|0.688| | |[50.3,56.7)|125|0.616| | |[56.7,89.4]|125|0.760| +-------+-----------+---+-----+ |sex |female |240|0.596| | |male |260|0.754| +-------+-----------+---+-----+ |sys.bp |[102,115) |125|0.512| | |[115,120) |125|0.480| | |[120,125) |125|0.744| | |[125,141] |125|0.976| +-------+-----------+---+-----+ |Overall| |500|0.678| +-------+-----------+---+-----+ > plot(s) > > plsmo(age, dz, group=sex, fun=qlogis, ylim=c(-3,3)) > plsmo(age, L, group=sex, method='raw', add=TRUE, prefix='True', trim=0) > title('Lowess-smoothed Estimates with True Regression Functions') > > dd <- datadist(age, sex, sys.bp) > options(datadist='dd') > # can also do: dd <- datadist(dd, newvar) > > f <- lrm(dz ~ rcs(age,5)*sex, x=TRUE, y=TRUE) > f Logistic Regression Model lrm(formula = dz ~ rcs(age, 5) * sex, x = TRUE, y = TRUE) Frequencies of Responses 0 1 235 265 Obs Max Deriv Model L.R. d.f. P C Dxy 500 1e-07 50.8 9 0 0.663 0.327 Gamma Tau-a R2 Brier 0.329 0.163 0.129 0.226 Coef S.E. Wald Z P Intercept -7.9925 3.9390 -2.03 0.0425 age 0.1810 0.1029 1.76 0.0786 age' -0.3458 0.4323 -0.80 0.4238 age'' 1.5460 2.2145 0.70 0.4851 age''' -2.6579 3.3658 -0.79 0.4297 sex=male 14.0546 4.6913 3.00 0.0027 age * sex=male -0.3335 0.1228 -2.72 0.0066 age' * sex=male 0.8253 0.5303 1.56 0.1197 age'' * sex=male -2.8522 2.8360 -1.01 0.3146 age''' * sex=male 3.3676 4.5642 0.74 0.4606 > # x=TRUE, y=TRUE for pentrace > > fpred <- Function(f) > fpred function (age = 50.310462, sex = "male") { -7.9925102 + 0.18097811 * age - 0.00032804104 * pmax(age - 33.874461, 0)^3 + 0.0014667311 * pmax(age - 44.433245, 0)^3 - 0.0025216207 * pmax(age - 50.310462, 0)^3 + 0.0018029552 * pmax(age - 55.835572, 0)^3 - 0.00042002458 * pmax(age - 66.340264, 0)^3 + 14.054577 * (sex == "male") + (sex == "male") * (-0.33352947 * age + 0.00078298646 * pmax(age - 33.874461, 0)^3 - 0.0027059801 * pmax(age - 44.433245, 0)^3 + 0.0031949484 * pmax(age - 50.310462, 0)^3 - 0.0016520917 * pmax(age - 55.835572, 0)^3 + 0.00038013702 * pmax(age - 66.340264, 0)^3) } > fpred(age=30, sex=levels(sex)) [1] -2.56 1.49 > > anova(f) Wald Statistics Response: dz Factor Chi-Square d.f. P age (Factor+Higher Order Factors) 30.2 8 0.0002 All Interactions 16.6 4 0.0024 Nonlinear (Factor+Higher Order Factors) 23.3 6 0.0007 sex (Factor+Higher Order Factors) 25.6 5 0.0001 All Interactions 16.6 4 0.0024 age * sex (Factor+Higher Order Factors) 16.6 4 0.0024 Nonlinear 16.0 3 0.0011 Nonlinear Interaction : f(A,B) vs. AB 16.0 3 0.0011 TOTAL NONLINEAR 23.3 6 0.0007 TOTAL NONLINEAR + INTERACTION 23.5 7 0.0014 TOTAL 37.2 9 <.0001 > > p <- plot(f, age=NA, sex=NA, conf.int=FALSE, ylim=c(-3,3)) > datadensity(p, age, sex) > scat1d(age) > > plsmo(age, L, group=sex, method='raw', add=TRUE, prefix='True', trim=0) > title('Spline Fits with True Regression Functions') > > f.bp <- lrm(dz.bp ~ rcs(age,5)*sex + rcs(sys.bp,5)) > > for(method in c('persp','image')) + p <- plot(f.bp, age=NA, sys.bp=NA, method=method) > # Legend(p) # NOTE: Needs subplot - not in R > > cat('Doing 25 bootstrap repetitions to validate model\n') Doing 25 bootstrap repetitions to validate model > validate(f, B=25) # in practice try to use 150 Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 index.orig training test optimism index.corrected n Dxy 0.3272 0.336 0.30141 0.03445 0.29272 25 R2 0.1290 0.142 0.11279 0.02953 0.09944 25 Intercept 0.0000 0.000 0.00886 -0.00886 0.00886 25 Slope 1.0000 1.000 0.86917 0.13083 0.86917 25 Emax 0.0000 0.000 0.03274 0.03274 0.03274 25 D 0.0996 0.111 0.08629 0.02470 0.07490 25 U -0.0040 -0.004 0.00186 -0.00586 0.00186 25 Q 0.1036 0.115 0.08444 0.03056 0.07305 25 B 0.2258 0.223 0.22953 -0.00637 0.23221 25 > > cat('Doing 25 bootstrap reps to check model calibration\n') Doing 25 bootstrap reps to check model calibration > cal <- calibrate(f, B=25) # use 150 in practice Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 > plot(cal) n=500 Mean absolute error=0.0185 Mean squared error=0.000536 0.9 Quantile of absolute error=0.0319 > title('Calibration of Unpenalized Model') > > p <- if(.R.) pentrace(f, penalty=c(.009,.009903,.02,.2,.5,1)) else + pentrace(f, penalty=1, method='optimize') > > f <- update(f, penalty=p$penalty) > f Logistic Regression Model lrm(formula = dz ~ rcs(age, 5) * sex, x = TRUE, y = TRUE, penalty = p$penalty) Frequencies of Responses 0 1 235 265 Penalty factors: simple nonlinear interaction nonlinear.interaction 0.02 0.02 0.02 0.02 Final penalty on -2 log L: 1.71 Obs Max Deriv Model L.R. d.f. P C Dxy 500 2e-05 49.8 7.95 0 0.664 0.327 Gamma Tau-a R2 Brier 0.329 0.163 0.122 0.226 Coef S.E. Wald Z P Penalty Scale Intercept -4.940899 2.47309 -2.00 0.0457 0.0000 age 0.099822 0.06297 1.59 0.1129 1.4410 age' -0.008624 0.24123 -0.04 0.9715 1.4759 age'' -0.049210 1.25333 -0.04 0.9687 0.4255 age''' -0.520005 2.06692 -0.25 0.8014 0.1320 sex=male 9.646487 2.96111 3.26 0.0011 0.1000 age * sex=male -0.216102 0.07559 -2.86 0.0042 3.6160 age' * sex=male 0.328440 0.30085 1.09 0.2750 1.0759 age'' * sex=male -0.454827 1.68334 -0.27 0.7870 0.2820 age''' * sex=male 0.077775 3.01079 0.03 0.9794 0.0841 > specs(f,long=TRUE) lrm(formula = dz ~ rcs(age, 5) * sex, x = TRUE, y = TRUE, penalty = p$penalty) Assumption Parameters d.f. age rcspline 33.874 44.433 50.31 55.836 66.34 4 sex category female male 1 age * sex interaction nonlinear x linear - f(A)B 4 age sex Low:effect 43.6 Adjust to 50.3 male High:effect 56.6 Low:prediction 30.4 female High:prediction 72.4 male Low 15.1 female High 89.4 male > edf <- effective.df(f) Original and Effective Degrees of Freedom Original Penalized All 9 7.95 Simple Terms 2 1.90 Interaction or Nonlinear 7 6.05 Nonlinear 6 5.12 Interaction 4 3.53 Nonlinear Interaction 3 2.61 > > p <- plot(f, age=NA, sex=NA, conf.int=FALSE, ylim=c(-3,3)) > datadensity(p, age, sex) > scat1d(age) > > plsmo(age, L, group=sex, method='raw', add=TRUE, prefix='True', trim=0) > title('Penalized Spline Fits with True Regression Functions') > > options(digits=3) > s <- summary(f) > s Effects Response : dz Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 age 43.6 56.6 13.0 0.80 0.35 0.12 1.49 Odds Ratio 43.6 56.6 13.0 2.23 NA 1.12 4.42 sex - female:male 2.0 1.0 NA -0.07 0.31 -0.67 0.53 Odds Ratio 2.0 1.0 NA 0.93 NA 0.51 1.70 Adjusted to: age=50.3 sex=male > plot(s) > > s <- summary(f, sex='male') > plot(s) > > fpred <- Function(f) > fpred function (age = 50.310462, sex = "male") { -4.9408986 + 0.099821811 * age - 8.1818002e-06 * pmax(age - 33.874461, 0)^3 - 4.6687755e-05 * pmax(age - 44.433245, 0)^3 - 0.00049334968 * pmax(age - 50.310462, 0)^3 + 0.0008754865 * pmax(age - 55.835572, 0)^3 - 0.00032726726 * pmax(age - 66.340264, 0)^3 + 9.6464867 * (sex == "male") + (sex == "male") * (-0.21610185 * age + 0.00031160482 * pmax(age - 33.874461, 0)^3 - 0.00043151337 * pmax(age - 44.433245, 0)^3 + 7.3788465e-05 * pmax(age - 50.310462, 0)^3 - 0.00017574468 * pmax(age - 55.835572, 0)^3 + 0.00022186476 * pmax(age - 66.340264, 0)^3) } > fpred(age=30, sex=levels(sex)) [1] -1.95 1.22 > sascode(fpred) -4.9408986 + 0.099821811 * age - 8.1818002e-06 * max(age - 33.874461, 0)**3 - 4.6687755e-05 * max(age - 44.433245, 0)**3 - 0.00049334968 * max(age - 50.310462, 0)**3 + 0.0008754865 * max(age - 55.835572, 0)**3 - 0.00032726726 * max(age - 66.340264, 0)**3 + 9.6464867 * (sex = "male") + (sex = "male") * (-0.21610185 * age + 0.00031160482 * max(age - 33.874461, 0)**3 - 0.00043151337 * max(age - 44.433245, 0)**3 + 7.3788465e-05 * max(age - 50.310462, 0)**3 - 0.00017574468 * max(age - 55.835572, 0)**3 + 0.00022186476 * max(age - 66.340264, 0)**3); > > cat('Doing 40 bootstrap reps to validate penalized model\n') Doing 40 bootstrap reps to validate penalized model > validate(f, B=40) Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 index.orig training test optimism index.corrected n Dxy 0.3273 0.339 0.29923 0.03963 0.28764 40 R2 0.1224 0.139 0.11053 0.02883 0.09356 40 Intercept 0.0000 0.000 0.00409 -0.00409 0.00409 40 Slope 1.0000 1.000 0.89841 0.10159 0.89841 40 Emax 0.0000 0.000 0.02459 0.02459 0.02459 40 D 0.0976 0.108 0.08447 0.02401 0.07356 40 U -0.0040 -0.004 0.00200 -0.00600 0.00200 40 Q 0.1016 0.112 0.08247 0.03001 0.07156 40 B 0.2263 0.222 0.23027 -0.00798 0.23425 40 > > cat('Doing 40 bootstrap reps to check penalized model calibration\n') Doing 40 bootstrap reps to check penalized model calibration > cal <- calibrate(f, B=40) Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 > plot(cal) n=500 Mean absolute error=0.0170 Mean squared error=0.000442 0.9 Quantile of absolute error=0.0287 > title('Calibration of Penalized Model') > > nomogram(f.bp, fun=plogis, + funlabel='Prob(dz)', + fun.at=c(.15,.2,.3,.4,.5,.6,.7,.8,.9,.95,.975), + fun.side=c(1,3,1,3,1,3,1,3,1,3,1)) > options(datadist=NULL) > > ##################### > #Detailed Example 2 # > ##################### > # Simulate the data. > n <- 1000 # define sample size > set.seed(17) # so can reproduce the results > treat <- factor(sample(c('a','b','c'), n, TRUE)) > num.diseases <- sample(0:4, n, TRUE) > age <- rnorm(n, 50, 10) > cholesterol <- rnorm(n, 200, 25) > weight <- rnorm(n, 150, 20) > sex <- factor(sample(c('female','male'), n, TRUE)) > label(age) <- 'Age' # label is in Hmisc > label(num.diseases) <- 'Number of Comorbid Diseases' > label(cholesterol) <- 'Total Cholesterol' > label(weight) <- 'Weight, lbs.' > label(sex) <- 'Sex' > units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc > > # Specify population model for log odds that Y=1 > L <- .1*(num.diseases-2) + .045*(age-50) + + (log(cholesterol - 10)-5.2)*(-2*(treat=='a') + + 3.5*(treat=='b')+2*(treat=='c')) > # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] > y <- ifelse(runif(n) < plogis(L), 1, 0) > cholesterol[1:3] <- NA # 3 missings, at random > > ddist <- datadist(cholesterol, treat, num.diseases, + age, weight, sex) > # Could have used ddist <- datadist(data.frame.name) > options(datadist="ddist") # defines data dist. to Design > cholesterol <- impute(cholesterol) # see impute in Hmisc library > # impute, describe, and several other basic functions are > # distributed as part of the Hmisc library > > fit <- lrm(y ~ treat*log(cholesterol - 10) + + scored(num.diseases) + rcs(age)) > > describe(y ~ treat + scored(num.diseases) + rcs(age)) y ~ treat + scored(num.diseases) + rcs(age) 4 Variables 1000 Observations --------------------------------------------------------------------------- y n missing unique Sum Mean 1000 0 2 522 0.522 --------------------------------------------------------------------------- treat n missing unique 1000 0 3 a (352, 35%), b (323, 32%), c (325, 32%) --------------------------------------------------------------------------- scored(num.diseases) : Number of Comorbid Diseases n missing unique 1000 0 5 0 1 2 3 4 Frequency 187 197 221 231 164 % 19 20 22 23 16 --------------------------------------------------------------------------- age : Age n missing unique Mean .05 .10 .25 .50 .75 .90 1000 0 1000 49.93 33.04 37.05 43.06 49.56 57.05 63.12 .95 65.92 lowest : 21.43 22.64 23.60 23.89 25.13, highest: 74.64 74.90 77.43 77.74 80.32 --------------------------------------------------------------------------- age' : Age n missing unique Mean .05 .10 .25 .50 1000 0 951 8.578 4.214e-12 5.953e-02 9.286e-01 4.169e+00 .75 .90 .95 1.280e+01 2.415e+01 3.005e+01 lowest : 0.000e+00 4.436e-12 1.637e-06 8.878e-06 1.872e-05 highest: 48.54 49.09 54.44 55.10 60.57 --------------------------------------------------------------------------- age'' : Age n missing unique Mean .05 .10 .25 .50 .75 .90 1000 0 726 1.702 0.0000 0.0000 0.0000 0.1735 2.1325 5.9507 .95 8.0651 lowest : 0.000e+00 1.347e-11 2.236e-09 1.647e-08 6.044e-07 highest: 14.71 14.91 16.84 17.07 19.04 --------------------------------------------------------------------------- age''' : Age n missing unique Mean .05 .10 .25 .50 1000 0 501 0.4724 0.000e+00 0.000e+00 0.000e+00 3.646e-10 .75 .90 .95 3.877e-01 1.802e+00 2.644e+00 lowest : 0.000e+00 7.291e-10 9.830e-07 1.677e-06 2.905e-06 highest: 5.304 5.384 6.154 6.248 7.035 --------------------------------------------------------------------------- > # or use describe(formula(fit)) for all variables used in fit > # describe function (in Hmisc) gets simple statistics on variables > #fit <- robcov(fit) # Would make all statistics which follow > # use a robust covariance matrix > # would need x=TRUE, y=TRUE in lrm > specs(fit) # Describe the design characteristics lrm(formula = y ~ treat * log(cholesterol - 10) + scored(num.diseases) + rcs(age)) Units Label Transformation treat treat cholesterol mg/dl Total Cholesterol log(cholesterol - 10) num.diseases Number of Comorbid Diseases age Age treat * cholesterol treat * cholesterol Assumption Parameters d.f. treat category a b c 2 cholesterol asis 1 num.diseases scored 0 1 2 3 4 4 age rcspline 33.043 43.838 49.562 56.28 65.924 4 treat * cholesterol interaction linear x linear - AB 2 > a <- anova(fit) > print(a, which='subscripts') # print which parameters being tested Wald Statistics Response: y Factor Chi-Square d.f. P treat (Factor+Higher Order Factors) 15.86 4 0.0032 All Interactions 10.77 2 0.0046 cholesterol (Factor+Higher Order Factors) 12.76 3 0.0052 All Interactions 10.77 2 0.0046 num.diseases 14.92 4 0.0049 Nonlinear 0.73 3 0.8662 age 67.91 4 <.0001 Nonlinear 1.11 3 0.7736 treat * cholesterol (Factor+Higher Order Factors) 10.77 2 0.0046 TOTAL NONLINEAR 2.03 6 0.9167 TOTAL NONLINEAR + INTERACTION 13.19 8 0.1056 TOTAL 90.62 13 <.0001 Tested 1-2,12-13 12-13 3,12-13 12-13 4-7 5-7 8-11 9-11 12-13 5-7,9-11 5-7,9-13 1-13 Subscripts correspond to: [1] treat=b treat=c cholesterol [4] num.diseases num.diseases=2 num.diseases=3 [7] num.diseases=4 age age' [10] age'' age''' treat=b * cholesterol [13] treat=c * cholesterol > plot(anova(fit)) # Depict Wald statistics graphically > anova(fit, treat, cholesterol) # Test these 2 by themselves Wald Statistics Response: y Factor Chi-Square d.f. P treat (Factor+Higher Order Factors) 15.9 4 0.0032 All Interactions 10.8 2 0.0046 cholesterol (Factor+Higher Order Factors) 12.8 3 0.0052 All Interactions 10.8 2 0.0046 TOTAL 17.7 5 0.0034 > summary(fit) # Estimate effects using default ranges Effects Response : y Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 cholesterol 183.8 217 32.9 -0.24 0.15 -0.54 0.06 Odds Ratio 183.8 217 32.9 0.79 NA 0.58 1.06 age 43.1 57 14.0 0.84 0.18 0.48 1.20 Odds Ratio 43.1 57 14.0 2.31 NA 1.61 3.30 treat - b:a 1.0 2 NA 0.40 0.16 0.08 0.73 Odds Ratio 1.0 2 NA 1.50 NA 1.09 2.07 treat - c:a 1.0 3 NA 0.20 0.16 -0.11 0.52 Odds Ratio 1.0 3 NA 1.23 NA 0.89 1.69 num.diseases - 0:2 3.0 1 NA -0.47 0.21 -0.89 -0.06 Odds Ratio 3.0 1 NA 0.62 NA 0.41 0.94 num.diseases - 1:2 3.0 2 NA -0.35 0.21 -0.76 0.05 Odds Ratio 3.0 2 NA 0.70 NA 0.47 1.06 num.diseases - 3:2 3.0 4 NA 0.08 0.20 -0.31 0.47 Odds Ratio 3.0 4 NA 1.08 NA 0.73 1.60 num.diseases - 4:2 3.0 5 NA 0.24 0.22 -0.20 0.67 Odds Ratio 3.0 5 NA 1.27 NA 0.82 1.96 Adjusted to: treat=a cholesterol=200 > plot(summary(fit)) # Graphical display of effects with C.L. > summary(fit, treat="b", age=60) Effects Response : y Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 cholesterol 183.8 217 32.9 0.50 0.17 0.17 0.83 Odds Ratio 183.8 217 32.9 1.65 NA 1.18 2.30 age 43.1 57 14.0 0.84 0.18 0.48 1.20 Odds Ratio 43.1 57 14.0 2.31 NA 1.61 3.30 treat - a:b 2.0 1 NA -0.40 0.16 -0.73 -0.08 Odds Ratio 2.0 1 NA 0.67 NA 0.48 0.92 treat - c:b 2.0 3 NA -0.20 0.17 -0.53 0.13 Odds Ratio 2.0 3 NA 0.82 NA 0.59 1.14 num.diseases - 0:2 3.0 1 NA -0.47 0.21 -0.89 -0.06 Odds Ratio 3.0 1 NA 0.62 NA 0.41 0.94 num.diseases - 1:2 3.0 2 NA -0.35 0.21 -0.76 0.05 Odds Ratio 3.0 2 NA 0.70 NA 0.47 1.06 num.diseases - 3:2 3.0 4 NA 0.08 0.20 -0.31 0.47 Odds Ratio 3.0 4 NA 1.08 NA 0.73 1.60 num.diseases - 4:2 3.0 5 NA 0.24 0.22 -0.20 0.67 Odds Ratio 3.0 5 NA 1.27 NA 0.82 1.96 Adjusted to: treat=b cholesterol=200 > # Specify reference cell and adjustment val > > summary(fit, age=c(50,70)) # Estimate effect of increasing age from Effects Response : y Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 cholesterol 184 217 32.9 -0.24 0.15 -0.54 0.06 Odds Ratio 184 217 32.9 0.79 NA 0.58 1.06 age 50 70 20.0 1.18 0.32 0.54 1.81 Odds Ratio 50 70 20.0 3.25 NA 1.72 6.13 treat - b:a 1 2 NA 0.40 0.16 0.08 0.73 Odds Ratio 1 2 NA 1.50 NA 1.09 2.07 treat - c:a 1 3 NA 0.20 0.16 -0.11 0.52 Odds Ratio 1 3 NA 1.23 NA 0.89 1.69 num.diseases - 0:2 3 1 NA -0.47 0.21 -0.89 -0.06 Odds Ratio 3 1 NA 0.62 NA 0.41 0.94 num.diseases - 1:2 3 2 NA -0.35 0.21 -0.76 0.05 Odds Ratio 3 2 NA 0.70 NA 0.47 1.06 num.diseases - 3:2 3 4 NA 0.08 0.20 -0.31 0.47 Odds Ratio 3 4 NA 1.08 NA 0.73 1.60 num.diseases - 4:2 3 5 NA 0.24 0.22 -0.20 0.67 Odds Ratio 3 5 NA 1.27 NA 0.82 1.96 Adjusted to: treat=a cholesterol=200 > # 50 to 70 > summary(fit, age=c(50,60,70)) # Increase age from 50 to 70, Effects Response : y Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 cholesterol 184 217 32.9 -0.24 0.15 -0.54 0.06 Odds Ratio 184 217 32.9 0.79 NA 0.58 1.06 age 50 70 20.0 1.18 0.32 0.54 1.81 Odds Ratio 50 70 20.0 3.25 NA 1.72 6.13 treat - b:a 1 2 NA 0.40 0.16 0.08 0.73 Odds Ratio 1 2 NA 1.50 NA 1.09 2.07 treat - c:a 1 3 NA 0.20 0.16 -0.11 0.52 Odds Ratio 1 3 NA 1.23 NA 0.89 1.69 num.diseases - 0:2 3 1 NA -0.47 0.21 -0.89 -0.06 Odds Ratio 3 1 NA 0.62 NA 0.41 0.94 num.diseases - 1:2 3 2 NA -0.35 0.21 -0.76 0.05 Odds Ratio 3 2 NA 0.70 NA 0.47 1.06 num.diseases - 3:2 3 4 NA 0.08 0.20 -0.31 0.47 Odds Ratio 3 4 NA 1.08 NA 0.73 1.60 num.diseases - 4:2 3 5 NA 0.24 0.22 -0.20 0.67 Odds Ratio 3 5 NA 1.27 NA 0.82 1.96 Adjusted to: treat=a cholesterol=200 > # adjust to 60 when estimating > # effects of other factors > # If had not defined datadist, would have to define > # ranges for all var. > > # Estimate and test treatment (b-a) effect averaged > # over 3 cholesterols > contrast(fit, list(treat='b',cholesterol=c(150,200,250)), + list(treat='a',cholesterol=c(150,200,250)), + type='average') Contrast S.E. Lower Upper Z Pr(>|z|) 1 0.291 0.166 -0.0343 0.616 1.75 0.0796 > # Remove type='average' to get 3 separate contrasts for b-a > > # Plot effects. plot(fit) plots effects of all predictors, > # showing values used for interacting factors as subtitles > # The ref.zero parameter is helpful for showing effects of > # predictors on a common scale for comparison of strength > plot(fit, ref.zero=TRUE, ylim=c(-2,2)) > > plot(fit, age=seq(20,80,length=100), treat=NA, conf.int=FALSE) > # Plots relationship between age and log > # odds, separate curve for each treat, no C.I. > plot(fit, age=NA, cholesterol=NA) > # 3-dimensional perspective plot for age, cholesterol, and > # log odds using default ranges for both variables > plot(fit, num.diseases=NA, fun=function(x) 1/(1+exp(-x)), #or fun=plogis + ylab="Prob", conf.int=.9) > # Plot estimated probabilities instead of log odds > # Again, if no datadist were defined, would have to > # tell plot all limits > logit <- predict(fit, expand.grid(treat="b",num.diseases=1:3, + age=c(20,40,60), + cholesterol=seq(100,300,length=10))) > #logit <- predict(fit, gendata(fit, nobs=12)) > # Interactively specify 12 predictor combinations using UNIX > # For UNIX or Windows, generate 9 combinations with other variables > # set to defaults, get predicted values > logit <- predict(fit, gendata(fit, age=c(20,40,60), + treat=c('a','b','c'))) > > # Since age doesn't interact with anything, we can quickly and > # interactively try various transformations of age, > # taking the spline function of age as the gold standard. We are > # seeking a linearizing transformation. Here age is linear in the > # population so this is not very productive. Also, if we simplify the > # model the total degrees of freedom will be too small and > # confidence limits too narrow > > ag <- 10:80 > logit <- predict(fit, expand.grid(treat="a", + num.diseases=0, age=ag, + cholesterol=median(cholesterol)), + type="terms")[,"age"] > # Note: if age interacted with anything, this would be the age > # "main effect" ignoring interaction terms > # Could also use > # logit <- plot(f, age=ag, ...)$x.xbeta[,2] > # which allows evaluation of the shape for any level > # of interacting factors. When age does not interact with > # anything, the result from > # predict(f, ..., type="terms") would equal the result from > # plot if all other terms were ignored > # Could also use > # logit <- predict(fit, gendata(fit, age=ag, cholesterol=median...)) > > plot(ag^.5, logit) # try square root vs. spline transform. > plot(ag^1.5, logit) # try 1.5 power > > # w <- latex(fit) # invokes latex.lrm, creates fit.tex > # print(w) # display or print model on screen > > # Draw a nomogram for the model fit > nomogram(fit, fun=plogis, funlabel="Prob[Y=1]") > > # Compose S function to evaluate linear predictors from fit > g <- Function(fit) > g(treat='b', cholesterol=260, age=50) [1] 1.30 > # Leave num.diseases at reference value > > # Use the Hmisc dataRep function to summarize sample > # sizes for subjects as cross-classified on 2 key > # predictors > drep <- dataRep(~ roundN(age,10) + num.diseases) > print(drep, long=TRUE) Data Representativeness n=1000 dataRep(formula = ~roundN(age, 10) + num.diseases) Specifications for Matching Type Parameters age round to nearest 10 num.diseases exact numeric Unique Combinations of Descriptor Variables age num.diseases Frequency 2 30 0 11 3 40 0 44 4 50 0 68 5 60 0 53 6 70 0 11 8 20 1 1 9 30 1 10 10 40 1 57 11 50 1 75 12 60 1 43 13 70 1 11 15 20 2 1 16 30 2 21 17 40 2 47 18 50 2 85 19 60 2 43 20 70 2 22 21 80 2 2 22 20 3 2 23 30 3 17 24 40 3 48 25 50 3 88 26 60 3 63 27 70 3 13 30 30 4 8 31 40 4 47 32 50 4 61 33 60 4 40 34 70 4 7 35 80 4 1 > > # Some approaches to making a plot showing how > # predicted values vary with a continuous predictor > # on the x-axis, with two other predictors varying > > fit <- lrm(y ~ log(cholesterol - 10) + + num.diseases + rcs(age) + rcs(weight) + sex) > > combos <- gendata(fit, age=10:100, + cholesterol=c(170,200,230), + weight=c(150,200,250)) > # num.diseases, sex not specified -> set to mode > # can also used expand.grid > > combos$pred <- predict(fit, combos) > library(lattice) > xyplot(pred ~ age | cholesterol*weight, data=combos) > xYplot(pred ~ age | cholesterol, groups=weight, + data=combos, type='l') # in Hmisc Loading required package: grid > xYplot(pred ~ age, groups=interaction(cholesterol,weight), + data=combos, type='l') > > # Can also do this with plot.Design but a single > # plot may be busy: > ch <- c(170, 200, 230) > plot(fit, age=NA, cholesterol=ch, weight=150, + conf.int=FALSE) > plot(fit, age=NA, cholesterol=ch, weight=200, + conf.int=FALSE, add=TRUE) > plot(fit, age=NA, cholesterol=ch, weight=250, + conf.int=FALSE, add=TRUE) > > #Here we use plot.Design to make 9 separate plots, with CLs > d <- expand.grid(cholesterol=c(170,200,230), + weight=c(150,200,250)) > for(i in 1:nrow(d)) { + plot(fit, age=NA, cholesterol=d$cholesterol[i], + weight=d$weight[i]) + title(paste('Chol=',format(d$cholesterol[i]),' ', + 'Wt=',format(d$weight[i]),sep='')) + } > options(datadist=NULL) > > ###################### > # Detailed Example 3 # > ###################### > n <- 2000 > set.seed(731) > age <- 50 + 12*rnorm(n) > label(age) <- "Age" > sex <- factor(sample(c('Male','Female'), n, + rep=TRUE, prob=c(.6, .4))) > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) > t <- -log(runif(n))/h > label(t) <- 'Follow-up Time' > e <- ifelse(t<=cens,1,0) > t <- pmin(t, cens) > units(t) <- "Year" > age.dec <- cut2(age, g=10, levels.mean=TRUE) > dd <- datadist(age, sex, age.dec) > options(datadist='dd') > Srv <- Surv(t,e) > > # Fit a model that doesn't assume anything except > # that deciles are adequate representations of age > f <- cph(Srv ~ strat(age.dec)+strat(sex), surv=TRUE) > # surv=TRUE speeds up computations, and confidence limits when > # there are no covariables are still accurate. > > # Plot log(-log 3-year survival probability) vs. mean age > # within age deciles and vs. sex > plot(f, age.dec=NA, sex=NA, time=3, + loglog=TRUE, val.lev=TRUE, ylim=c(-5,-1)) > > # Fit a model assuming proportional hazards for age and > # absence of age x sex interaction > f <- cph(Srv ~ rcs(age,4)+strat(sex), surv=TRUE) > survplot(f, sex=NA, n.risk=TRUE) > # Add ,age=60 after sex=NA to tell survplot use age=60 > # Validate measures of model performance using the bootstrap > # First must add data (design matrix and Srv) to fit object > f <- update(f, x=TRUE, y=TRUE) > validate(f, B=10, dxy=TRUE, u=5) # use t=5 for Dxy (only) Iteration: 1 2 3 4 5 6 7 8 9 10 index.orig training test optimism index.corrected n Dxy 0.3224 0.278818 0.271017 0.007801 0.314561 10 R2 0.0426 0.043477 0.059103 -0.015626 0.058246 10 Slope 1.0000 1.000000 1.048188 -0.048188 1.048188 10 D 0.0158 0.015902 0.013839 0.002063 0.013706 10 U -0.0004 -0.000394 0.000248 -0.000643 0.000242 10 Q 0.0162 0.016297 0.013591 0.002706 0.013463 10 > # Use B=150 in practice > # Validate model for accuracy of predicting survival at t=1 > # Get Kaplan-Meier estimates by divided subjects into groups > # of size 200 (for other values of u must put time.inc=u in > # call to cph) > cal <- calibrate(f, B=10, u=1, m=200) # B=150 in practice Using Cox survival estimates at 1 Years Averaging 1 repetitions of B= 10 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations x n events KM std.err [1,] 0.9353 200 85 0.9324 0.2775 [2,] 0.9525 200 62 0.9541 0.3335 [3,] 0.9621 200 49 0.9690 0.4086 [4,] 0.9689 200 43 0.9842 0.5774 [5,] 0.9734 200 38 0.9743 0.4474 [6,] 0.9770 200 30 0.9585 0.3538 [7,] 0.9800 200 36 0.9897 0.7072 [8,] 0.9829 200 26 0.9845 0.5776 [9,] 0.9865 200 21 0.9742 0.4474 [10,] 0.9923 200 11 0.9895 0.7072 Iteration: 1 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 2 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 3 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 4 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 5 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 6 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 7 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 8 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 9 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 10 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Mean over 1 overall replications mean.optimism mean.corrected n [1,] 0.024385 -0.027285 10 [2,] 0.002499 -0.000925 10 [3,] -0.000549 0.007481 10 [4,] 0.006833 0.008510 10 [5,] 0.001451 -0.000509 10 [6,] -0.001259 -0.017206 10 [7,] -0.000195 0.009937 10 [8,] 0.003631 -0.002014 10 [9,] 0.002904 -0.015251 10 [10,] 0.001350 -0.004211 10 > plot(cal) > # Check proportional hazards assumption for age terms > z <- cox.zph(f, 'identity') > print(z); plot(z) rho chisq p age 0.1014 5.36 0.0206 age' -0.0862 3.78 0.0518 age'' 0.0679 2.33 0.1266 GLOBAL NA 7.74 0.0518 > > # Re-fit this model without storing underlying survival > # curves for reference groups, but storing raw data with > # the fit (could also use f <- update(f, surv=FALSE, x=TRUE, y=TRUE)) > f <- cph(Srv ~ rcs(age,4)+strat(sex), x=TRUE, y=TRUE) > # Get accurate C.L. for any age > # Note: for evaluating shape of regression, we would not ordinarily > # bother to get 3-year survival probabilities - would just use X * beta > # We do so here to use same scale as nonparametric estimates > f Cox Proportional Hazards Model cph(formula = Srv ~ rcs(age, 4) + strat(sex), x = TRUE, y = TRUE) Obs Events Model L.R. d.f. P Score Score P 2000 401 79.8 3 0 76.3 0 R2 0.043 Status Stratum No Event Event sex=Female 557 226 sex=Male 1042 175 coef se(coef) z p age 0.0705 0.0236 2.984 0.00285 age' -0.0587 0.0546 -1.075 0.28220 age'' 0.1526 0.2039 0.748 0.45421 > anova(f) Wald Statistics Response: Srv Factor Chi-Square d.f. P age 68.56 3 <.0001 Nonlinear 3.56 2 0.168 TOTAL 68.56 3 <.0001 > ages <- seq(20, 80, by=4) # Evaluate at fewer points. Default is 100 > # For exact C.L. formula n=100 -> much memory > plot(f, age=ages, sex=NA, time=3, loglog=TRUE, ylim=c(-5,-1)) > > # Fit a model assuming proportional hazards for age but > # allowing for general interaction between age and sex > f <- cph(Srv ~ rcs(age,4)*strat(sex), x=TRUE, y=TRUE) > anova(f) Wald Statistics Response: Srv Factor Chi-Square d.f. P age (Factor+Higher Order Factors) 68.52 6 <.0001 All Interactions 1.94 3 0.585 Nonlinear (Factor+Higher Order Factors) 4.92 4 0.296 age * sex (Factor+Higher Order Factors) 1.94 3 0.585 Nonlinear 1.90 2 0.386 Nonlinear Interaction : f(A,B) vs. AB 1.90 2 0.386 TOTAL NONLINEAR 4.92 4 0.296 TOTAL NONLINEAR + INTERACTION 4.92 5 0.426 TOTAL 68.52 6 <.0001 > ages <- seq(20, 80, by=6) > # Still fewer points - more parameters in model > > # Plot 3-year survival probability (log-log and untransformed) > # vs. age and sex, obtaining accurate confidence limits > plot(f, age=ages, sex=NA, time=3, loglog=TRUE, ylim=c(-5,-1)) > plot(f, age=ages, sex=NA, time=3) > # Having x=TRUE, y=TRUE in fit also allows computation of influence stats > r <- resid(f, "dfbetas") > which.influence(f) $age [1] "177" "350" "734" "918" "1011" "1906" $"age * sex" [1] "352" "710" "918" "1113" "1387" > # Use survest to estimate 3-year survival probability and > # confidence limits for selected subjects > survest(f, expand.grid(age=c(20,40,60), sex=c('Female','Male')), + times=c(2,4,6), conf.int=.95) $time [1] 2 4 6 $surv 2 4 6 1 0.975 0.954 0.925 2 0.942 0.895 0.832 3 0.879 0.786 0.672 4 0.996 0.993 0.990 5 0.968 0.943 0.919 6 0.937 0.890 0.846 $std.err 2 4 6 1 0.5006 0.4891 0.4737 2 0.1119 0.1060 0.0977 3 0.0661 0.0587 0.0504 4 0.7669 0.7641 0.7608 5 0.1195 0.1160 0.1128 6 0.0887 0.0838 0.0792 $lower 2 4 6 1 0.933 0.879 0.808 2 0.928 0.869 0.793 3 0.862 0.757 0.631 4 0.983 0.969 0.956 5 0.959 0.928 0.899 6 0.925 0.869 0.818 $upper 2 4 6 1 0.991 0.983 0.972 2 0.954 0.916 0.864 3 0.895 0.813 0.709 4 0.999 0.998 0.998 5 0.975 0.955 0.936 6 0.947 0.908 0.870 $strata [1] 1 1 1 2 2 2 attr(,"levels") [1] "sex=Female" "sex=Male" > > # Create an S function srv that computes fitted > # survival probabilities on demand, for non-interaction model > f <- cph(Srv ~ rcs(age,4)+strat(sex), surv=TRUE) > srv <- Survival(f) > # Define functions to compute 3-year estimates as a function of > # the linear predictors (X*Beta) > surv.f <- function(lp) srv(3, lp, stratum="sex=Female") > surv.m <- function(lp) srv(3, lp, stratum="sex=Male") > # Create a function that computes quantiles of survival time > # on demand > quant <- Quantile(f) > # Define functions to compute median survival time > med.f <- function(lp) quant(.5, lp, stratum="sex=Female") > med.m <- function(lp) quant(.5, lp, stratum="sex=Male") > # Draw a nomogram to compute several types of predicted values > nomogram(f, fun=list(surv.m, surv.f, med.m, med.f), + funlabel=c("S(3 | Male)","S(3 | Female)", + "Median (Male)","Median (Female)"), + fun.at=list(c(.8,.9,.95,.98,.99),c(.1,.3,.5,.7,.8,.9,.95,.98), + c(8,12),c(1,2,4,8,12))) Warning in approx(fu[s], xseq[s], fat) : collapsing to unique 'x' values Warning in approx(fu[s], xseq[s], fat) : collapsing to unique 'x' values > options(datadist=NULL) > > ######################################################## > # Simple examples using small datasets for checking # > # calculations across different systems in which random# > # number generators cannot be synchronized. # > ######################################################## > > x1 <- 1:20 > x2 <- abs(x1-10) > x3 <- factor(rep(0:2,length.out=20)) > y <- c(rep(0:1,8),1,1,1,1) > dd <- datadist(x1,x2,x3) > options(datadist='dd') > f <- lrm(y ~ rcs(x1,3) + x2 + x3) > f Logistic Regression Model lrm(formula = y ~ rcs(x1, 3) + x2 + x3) Frequencies of Responses 0 1 8 12 Obs Max Deriv Model L.R. d.f. P C Dxy 20 4e-06 5.14 5 0.399 0.74 0.479 Gamma Tau-a R2 Brier 0.479 0.242 0.306 0.192 Coef S.E. Wald Z P Intercept 12.5719 14.387 0.87 0.382 x1 -1.3999 1.537 -0.91 0.362 x1' 2.2669 2.267 1.00 0.317 x2 -1.2581 1.412 -0.89 0.373 x3=1 0.8049 1.276 0.63 0.528 x3=2 -0.1263 1.283 -0.10 0.922 > specs(f, TRUE) lrm(formula = y ~ rcs(x1, 3) + x2 + x3) Assumption Parameters d.f. x1 rcspline 5 10.5 16 2 x2 asis 1 x3 category 0 1 2 2 x1 x2 x3 Low:effect 5.75 2.75 Adjust to 10.50 5.00 0 High:effect 15.25 7.25 Low:prediction 1.95 0.95 0 High:prediction 19.05 9.05 2 Low 1.00 0.00 0 High 20.00 10.00 2 > anova(f) Wald Statistics Response: y Factor Chi-Square d.f. P x1 2.32 2 0.314 Nonlinear 1.00 1 0.317 x2 0.79 1 0.373 x3 0.62 2 0.735 TOTAL 2.74 5 0.740 > anova(f, x1, x2) Wald Statistics Response: y Factor Chi-Square d.f. P x1 2.32 2 0.314 Nonlinear 1.00 1 0.317 x2 0.79 1 0.373 TOTAL 2.37 3 0.499 > plot(anova(f)) > s <- summary(f) > s Effects Response : y Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 x1 5.75 15.25 9.5 2.85 1.93 -0.94 6.64 Odds Ratio 5.75 15.25 9.5 17.34 NA 0.39 767.23 x2 2.75 7.25 4.5 -5.66 6.35 -18.11 6.79 Odds Ratio 2.75 7.25 4.5 0.00 NA 0.00 887.93 x3 - 1:0 1.00 2.00 NA 0.80 1.28 -1.70 3.31 Odds Ratio 1.00 2.00 NA 2.24 NA 0.18 27.29 x3 - 2:0 1.00 3.00 NA -0.13 1.28 -2.64 2.39 Odds Ratio 1.00 3.00 NA 0.88 NA 0.07 10.89 > plot(s, log=TRUE) > par(mfrow=c(2,2)) > plot(f) > par(mfrow=c(1,1)) > nomogram(f) > g <- Function(f) > g(11,7,'1') [1] -6.79 > contrast(f, list(x1=11,x2=7,x3='1'), list(x1=10,x2=6,x3='2')) Contrast S.E. Lower Upper Z Pr(>|z|) 1 -0.0266 1.80 -3.55 3.50 -0.01 0.988 > fastbw(f) Deleted Chi-Sq d.f. P Residual d.f. P AIC x3 0.62 2 0.735 0.62 2 0.735 -3.38 x1 2.07 2 0.355 2.69 4 0.611 -5.31 x2 0.05 1 0.818 2.74 5 0.740 -7.26 Approximate Estimates after Deleting Factors Coef S.E. Wald Z P [1,] 0.1387 0.5163 0.2685 0.7883 Factors in Final Model None > gendata(f, x1=1:5) x1 x2 x3 1 1 5 0 2 2 5 0 3 3 5 0 4 4 5 0 5 5 5 0 > # w <- latex(f) > > f <- update(f, x=TRUE,y=TRUE) > which.influence(f) $Intercept [1] "6" "9" "10" "11" "12" "13" "14" "15" $x1 [1] "6" "9" "10" "11" "12" "13" "14" "15" $x2 [1] "4" "6" "9" "10" "11" "12" "13" "14" "15" $x3 [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12" "13" "14" "15" > residuals(f,'gof') Sum of squared errors Expected value|H0 SD 3.838 3.751 0.142 Z P 0.614 0.539 > robcov(f)$var Intercept x1 x1' x2 x3=1 x3=2 Intercept 120.23 -12.790 18.536 -11.869 3.084 -1.239 x1 -12.79 1.373 -1.993 1.262 -0.372 0.081 x1' 18.54 -1.993 2.912 -1.840 0.561 -0.114 x2 -11.87 1.262 -1.840 1.193 -0.388 0.055 x3=1 3.08 -0.372 0.561 -0.388 1.401 0.529 x3=2 -1.24 0.081 -0.114 0.055 0.529 1.612 > validate(f, B=10) Iteration: 1 2 3 4 5 6 7 8 9 10 Divergence or singularity in 1 samples index.orig training test optimism index.corrected n Dxy 0.479 0.670 0.394 0.2766 0.2026 9 R2 0.306 0.491 0.158 0.3335 -0.0271 9 Intercept 0.000 0.000 0.374 -0.3737 0.3737 9 Slope 1.000 1.000 0.411 0.5888 0.4112 9 Emax 0.000 0.000 0.271 0.2709 0.2709 9 D 0.207 0.449 0.076 0.3727 -0.1656 9 U -0.100 -0.100 5.324 -5.4244 5.3244 9 Q 0.307 0.549 -5.248 5.7970 -5.4900 9 B 0.192 0.145 0.238 -0.0926 0.2845 9 > cal <- calibrate(f, B=10) Iteration: 1 2 3 4 5 6 7 8 9 10 > plot(cal) n=20 Mean absolute error=0.103 Mean squared error=0.0157 0.9 Quantile of absolute error=0.218 > > f <- ols(y ~ rcs(x1,3) + x2 + x3, x=TRUE, y=TRUE) > anova(f) Analysis of Variance Response: y Factor d.f. Partial SS MS F P x1 2 0.6080 0.3040 1.10 0.359 Nonlinear 1 0.1139 0.1139 0.41 0.530 x2 1 0.0845 0.0845 0.31 0.588 x3 2 0.0980 0.0490 0.18 0.839 REGRESSION 5 0.9454 0.1891 0.69 0.641 ERROR 14 3.8546 0.2753 > anova(f, x1, x2) Analysis of Variance Response: y Factor d.f. Partial SS MS F P x1 2 0.6080 0.3040 1.10 0.359 Nonlinear 1 0.1139 0.1139 0.41 0.530 x2 1 0.0845 0.0845 0.31 0.588 REGRESSION 3 0.7883 0.2628 0.95 0.441 ERROR 14 3.8546 0.2753 > plot(anova(f)) > s <- summary(f) > s Effects Response : y Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95 x1 5.75 15.25 9.5 0.40 0.29 -0.16 0.96 x2 2.75 7.25 4.5 -0.74 1.33 -3.35 1.88 x3 - 1:0 1.00 2.00 NA 0.12 0.28 -0.43 0.68 x3 - 2:0 1.00 3.00 NA -0.04 0.30 -0.62 0.54 > plot(s, log=TRUE) > par(mfrow=c(2,2)) > plot(f) > par(mfrow=c(1,1)) > nomogram(f) > g <- Function(f) > g(11,7,'1') [1] -0.341 > contrast(f, list(x1=11,x2=7,x3='1'), list(x1=10,x2=6,x3='2')) Contrast S.E. Lower Upper t Pr(>|t|) 1 0.0431 0.416 -0.849 0.935 0.1 0.919 Error d.f.= 14 > fastbw(f) Deleted Chi-Sq d.f. P Residual d.f. P AIC R2 x3 0.36 2 0.837 0.36 2 0.837 -3.64 0.177 x1 2.31 2 0.315 2.66 4 0.615 -5.34 0.044 x2 0.77 1 0.381 3.43 5 0.633 -6.57 0.000 Approximate Estimates after Deleting Factors Coef S.E. Wald Z P [1,] 0.6 0.1173 5.114 3.158e-07 Factors in Final Model None > gendata(f, x1=1:5) x1 x2 x3 1 1 5 0 2 2 5 0 3 3 5 0 4 4 5 0 5 5 5 0 > # w <- latex(f) > > f <- update(f, x=TRUE,y=TRUE) > which.influence(f) $Intercept [1] 9 10 11 12 13 14 15 $x1 [1] 9 10 12 13 14 15 20 $x2 [1] 9 10 11 13 14 15 $x3 [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 > residuals(f,'dfbetas') [,1] [,2] [,3] [,4] [,5] [,6] [1,] -0.0936 0.0919 -0.05658 -0.01012 0.32639 0.27234 [2,] 0.0653 -0.0882 0.05533 -0.00718 0.37994 0.05626 [3,] -0.0578 0.0742 -0.05248 0.01802 -0.02299 -0.34424 [4,] -0.0509 0.0782 -0.10175 0.12113 -0.36849 -0.35476 [5,] 0.0670 -0.0672 0.08629 -0.08764 -0.34359 0.00142 [6,] -0.0642 0.0668 -0.08449 0.08093 0.01397 0.45502 [7,] 0.0777 -0.1116 0.11752 -0.09713 0.24649 0.26189 [8,] 0.0329 -0.0145 0.00881 -0.04448 0.30022 -0.01865 [9,] -0.3838 0.3611 -0.36027 0.40431 -0.03441 -0.40693 [10,] 1.2161 -1.1185 1.12853 -1.22468 -0.42513 -0.39218 [11,] -0.2090 0.1707 -0.17653 0.25217 -0.46282 0.02633 [12,] -0.2109 0.2320 -0.22419 0.18175 -0.01904 0.35785 [13,] 0.4742 -0.5273 0.51571 -0.47613 0.40619 0.44833 [14,] -0.3388 0.3478 -0.34018 0.32129 0.22391 -0.06502 [15,] 0.3526 -0.3519 0.33667 -0.33594 0.04203 -0.36204 [16,] -0.1551 0.1730 -0.16172 0.16361 -0.23284 -0.22553 [17,] -0.0156 0.0139 -0.01075 0.01373 0.05527 -0.00131 [18,] 0.0396 -0.0477 0.05557 -0.04031 -0.00693 0.15061 [19,] 0.0133 -0.0128 0.01473 -0.01136 -0.02656 -0.02050 [20,] -0.1792 0.1954 -0.20883 0.17801 -0.17176 -0.04267 > robcov(f)$var Intercept x1 x1' x2 x3=1 x3=2 Intercept 5.1283 -0.53514 0.75372 -0.50713 0.0810 0.02854 x1 -0.5351 0.05642 -0.07939 0.05284 -0.0102 -0.00534 x1' 0.7537 -0.07939 0.11220 -0.07502 0.0143 0.00774 x2 -0.5071 0.05284 -0.07502 0.05121 -0.0107 -0.00542 x3=1 0.0810 -0.01022 0.01429 -0.01072 0.0506 0.02366 x3=2 0.0285 -0.00534 0.00774 -0.00542 0.0237 0.06305 > validate(f, B=10) Iteration: 1 2 3 4 5 6 7 8 9 10 index.orig training test optimism index.corrected n R-square 0.197 0.415 -0.276 0.691 -0.494 10 MSE 0.193 0.137 0.306 -0.170 0.362 10 Intercept 0.000 0.000 0.269 -0.269 0.269 10 Slope 1.000 1.000 0.529 0.471 0.529 10 > cal <- calibrate(f, B=10) Iteration: 1 2 3 4 5 6 7 8 9 10 > plot(cal) n=20 Mean absolute error=0.056 Mean squared error=0.00416 0.9 Quantile of absolute error=0.110 > > S <- Surv(c(1,4,2,3,5,8,6,7,20,18,19,9,12,10,11,13,16,14,15,17)) > survplot(survfit(S ~ x3)) > f <- psm(S ~ rcs(x1,3)+x2+x3, x=TRUE,y=TRUE) > f Parametric Survival Model: Weibull Distribution psm(formula = S ~ rcs(x1, 3) + x2 + x3, x = TRUE, y = TRUE) Obs Events Model L.R. d.f. P R2 20 20 39.7 5 0 0.86 Value Std. Error z p (Intercept) 5.595 1.406 3.98 6.95e-05 x1 -0.315 0.144 -2.18 2.90e-02 x1' 0.595 0.204 2.91 3.57e-03 x2 -0.509 0.135 -3.76 1.70e-04 x3=1 0.248 0.133 1.86 6.22e-02 x3=2 0.226 0.134 1.69 9.07e-02 Log(scale) -1.548 0.184 -8.42 3.77e-17 Scale= 0.213 > # NOTE: LR chi-sq of 39.67 disagrees with that from old survreg > # and old psm (77.65); suspect were also testing sigma=1 > > for(w in c('survival','hazard')) + print(survest(f, data.frame(x1=7,x2=3,x3='1'), + times=c(5,7), conf.int=.95, what=w)) N: 20 Events: 20 Time survival Lower Upper SE 1 5 0.925 0.722 0.981 0.729 2 7 0.684 0.323 0.880 0.556 Warning in survest.psm(f, data.frame(x1 = 7, x2 = 3, x3 = "1"), times = c(5, : conf.int ignored for what="hazard" N: 20 Events: 20 Time hazard 1 5 0.0734 2 7 0.2550 > # S-Plus 2000 using old survival library: > # S(t):.925 .684 SE:0.729 0.556 Hazard:0.0734 0.255 > > plot(f, x1=NA, time=5) > f$var (Intercept) x1 x1' x2 x3=1 x3=2 (Intercept) 1.97814 -0.201945 0.285110 -0.18836 -0.05242 -0.057829 x1 -0.20194 0.020788 -0.029378 0.01923 0.00485 0.004839 x1' 0.28511 -0.029378 0.041700 -0.02735 -0.00611 -0.006385 x2 -0.18836 0.019231 -0.027353 0.01831 0.00334 0.004776 x3=1 -0.05242 0.004851 -0.006114 0.00334 0.01762 0.008531 x3=2 -0.05783 0.004839 -0.006385 0.00478 0.00853 0.017852 Log(scale) -0.00138 -0.000078 0.000575 -0.00067 0.00161 -0.000767 Log(scale) (Intercept) -0.001379 x1 -0.000078 x1' 0.000575 x2 -0.000670 x3=1 0.001613 x3=2 -0.000767 Log(scale) 0.033807 > set.seed(3) > # robcov(f)$var when score residuals implemented > bootcov(f, B=30)$var Warning in survreg.fit2(x, y, dist = dist, parms = parms, fixed = fixed, : Ran out of iterations and did not converge Warning in bootcov(f, B = 30) : fit failure in 1 resamples. Might try increasing maxit (Intercept) x1 x1' x2 x3=1 x3=2 log scale (Intercept) 4.44053 -0.45064 0.64240 -0.430944 -0.07468 -0.11140 0.009966 x1 -0.45064 0.04620 -0.06563 0.043312 0.00676 0.01074 -0.002065 x1' 0.64240 -0.06563 0.09402 -0.062694 -0.01098 -0.01478 0.002747 x2 -0.43094 0.04331 -0.06269 0.043460 0.00623 0.00768 -0.000885 x3=1 -0.07468 0.00676 -0.01098 0.006231 0.02989 0.01552 0.010113 x3=2 -0.11140 0.01074 -0.01478 0.007676 0.01552 0.03733 0.017960 log scale 0.00997 -0.00207 0.00275 -0.000885 0.01011 0.01796 0.081652 > validate(f, B=10) Iteration: 1 2 3 4 5 6 7 8 9 10 index.orig training test optimism index.corrected n R2 0.8640 0.8922 0.8086 0.083569 0.7805 10 Intercept 0.0000 0.0000 0.0237 -0.023697 0.0237 10 Slope 1.0000 1.0000 1.0010 -0.000957 1.0010 10 D 0.3069 0.3618 0.2587 0.103170 0.2038 10 U -0.0159 -0.0163 0.0409 -0.057284 0.0414 10 Q 0.3228 0.3782 0.2177 0.160454 0.1624 10 > cal <- calibrate(f, u=5, B=10, m=10) Averaging 1 repetitions of B= 10 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations x n events KM std.err [1,] 0.5069 10 10 0.5 0.4562 [2,] 0.9945 10 10 1.0 0.0000 Iteration: 1 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 2 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 3 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 4 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 5 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 6 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 7 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 8 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 9 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 10 Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(psurv, Surv(inverse(y[, 1]), y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations index.orig training test optimism index.corrected n [1,] -0.00691 -0.01774 -0.01821 4.72e-04 -0.00739 9 [2,] 0.00548 0.00304 0.00307 -3.56e-05 0.00552 10 Mean over 1 overall replications mean.optimism mean.corrected n [1,] 4.72e-04 -0.00739 9 [2,] -3.56e-05 0.00552 10 > plot(cal) > r <- resid(f) > survplot(r) > > f <- cph(S ~ rcs(x1,3)+x2+x3, x=TRUE,y=TRUE,surv=TRUE,time.inc=5) > f Cox Proportional Hazards Model cph(formula = S ~ rcs(x1, 3) + x2 + x3, x = TRUE, y = TRUE, surv = TRUE, time.inc = 5) Obs Events Model L.R. d.f. P Score Score P 20 20 35.4 5 0 37.8 0 R2 0.842 coef se(coef) z p x1 2.41 1.174 2.06 0.03972 x1' -4.13 1.707 -2.42 0.01548 x2 3.35 1.209 2.77 0.00562 x3=1 -1.07 0.687 -1.56 0.11802 x3=2 -1.10 0.767 -1.44 0.15108 > plot(f, x1=NA, time=5) > robcov(f)$var x1 x1' x2 x3=1 x3=2 x1 1.3954 -2.0182 1.439 -0.0672 0.434 x1' -2.0182 2.9262 -2.092 0.0986 -0.611 x2 1.4391 -2.0919 1.505 -0.0901 0.409 x3=1 -0.0672 0.0986 -0.090 0.3518 0.123 x3=2 0.4339 -0.6106 0.409 0.1235 0.621 > bootcov(f, B=10) Cox Proportional Hazards Model cph(formula = S ~ rcs(x1, 3) + x2 + x3, x = TRUE, y = TRUE, surv = TRUE, time.inc = 5) Obs Events Model L.R. d.f. P Score Score P 20 20 35.4 5 0 37.8 0 R2 0.842 coef se(coef) z p x1 2.41 2.94 0.821 0.412 x1' -4.13 6.11 -0.676 0.499 x2 3.35 5.30 0.632 0.528 x3=1 -1.07 2.45 -0.438 0.661 x3=2 -1.10 7.58 -0.145 0.885 > validate(f, B=10) Iteration: 1 2 3 4 5 6 7 8 9 10 index.orig training test optimism index.corrected n R2 0.8417 0.8629 0.7572 0.106 0.7361 10 Slope 1.0000 1.0000 0.5942 0.406 0.5942 10 D 0.4061 0.4590 0.3213 0.138 0.2684 10 U -0.0236 -0.0236 0.3382 -0.362 0.3382 10 Q 0.4297 0.4826 -0.0169 0.499 -0.0698 10 > cal <- calibrate(f, u=5, B=10, m=10) Using Cox survival estimates at 5 Days Averaging 1 repetitions of B= 10 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations x n events KM std.err [1,] 0.5051 10 10 0.5 0.4562 [2,] 0.9941 10 10 1.0 0.0000 Iteration: 1 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 2 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 3 Warning in fitter(X, Y, strata = Strata, offset = offset, weights = weights, : Ran out of iterations and did not converge Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 4 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 5 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 6 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 7 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 8 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 9 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 10 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Mean over 1 overall replications mean.optimism mean.corrected n [1,] -0.00411 -0.000971 9 [2,] 0.00542 0.000431 10 > survplot(f, x1=c(2,19)) > options(datadist=NULL) > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "anova.Design" > > ### * anova.Design > > flush(stderr()); flush(stdout()) > > ### Name: anova.Design > ### Title: Analysis of Variance (Wald and F Statistics) > ### Aliases: anova.Design print.anova.Design text.anova.Design > ### plot.anova.Design latex.anova.Design > ### Keywords: models regression htest aplot > > ### ** Examples > > n <- 1000 # define sample size > set.seed(17) # so can reproduce the results > treat <- factor(sample(c('a','b','c'), n,TRUE)) > num.diseases <- sample(0:4, n,TRUE) > age <- rnorm(n, 50, 10) > cholesterol <- rnorm(n, 200, 25) > weight <- rnorm(n, 150, 20) > sex <- factor(sample(c('female','male'), n,TRUE)) > label(age) <- 'Age' # label is in Hmisc > label(num.diseases) <- 'Number of Comorbid Diseases' > label(cholesterol) <- 'Total Cholesterol' > label(weight) <- 'Weight, lbs.' > label(sex) <- 'Sex' > units(cholesterol) <- 'mg/dl' # uses units.default in Hmisc > > # Specify population model for log odds that Y=1 > L <- .1*(num.diseases-2) + .045*(age-50) + + (log(cholesterol - 10)-5.2)*(-2*(treat=='a') + + 3.5*(treat=='b')+2*(treat=='c')) > # Simulate binary y to have Prob(y=1) = 1/[1+exp(-L)] > y <- ifelse(runif(n) < plogis(L), 1, 0) > > fit <- lrm(y ~ treat + scored(num.diseases) + rcs(age) + + log(cholesterol+10) + treat:log(cholesterol+10)) > anova(fit) # Test all factors Wald Statistics Response: y Factor Chi-Square d.f. P treat (Factor+Higher Order Factors) 15.88 4 0.0032 All Interactions 10.79 2 0.0045 num.diseases 14.91 4 0.0049 Nonlinear 0.73 3 0.8660 age 67.97 4 <.0001 Nonlinear 1.11 3 0.7738 cholesterol (Factor+Higher Order Factors) 12.99 3 0.0047 All Interactions 10.79 2 0.0045 treat * cholesterol (Factor+Higher Order Factors) 10.79 2 0.0045 TOTAL NONLINEAR 2.03 6 0.9168 TOTAL NONLINEAR + INTERACTION 13.20 8 0.1051 TOTAL 90.80 13 <.0001 > anova(fit, treat, cholesterol) # Test these 2 by themselves Wald Statistics Response: y Factor Chi-Square d.f. P treat (Factor+Higher Order Factors) 15.9 4 0.0032 All Interactions 10.8 2 0.0045 cholesterol (Factor+Higher Order Factors) 13.0 3 0.0047 All Interactions 10.8 2 0.0045 TOTAL 17.9 5 0.0031 > # to get their pooled effects > g <- lrm(y ~ treat*rcs(age)) > dd <- datadist(treat, num.diseases, age, cholesterol) > options(datadist='dd') > plot(g, age=NA, treat="b") > s <- anova(g) > print(s) Wald Statistics Response: y Factor Chi-Square d.f. P treat (Factor+Higher Order Factors) 5.62 10 0.846 All Interactions 1.30 8 0.996 age (Factor+Higher Order Factors) 65.99 12 <.0001 All Interactions 1.30 8 0.996 Nonlinear (Factor+Higher Order Factors) 2.23 9 0.987 treat * age (Factor+Higher Order Factors) 1.30 8 0.996 Nonlinear 0.99 6 0.986 Nonlinear Interaction : f(A,B) vs. AB 0.99 6 0.986 TOTAL NONLINEAR 2.23 9 0.987 TOTAL NONLINEAR + INTERACTION 2.57 11 0.995 TOTAL 69.06 14 <.0001 > #p <- locator(1) # click mouse at upper left corner of table > p <- list(x=32,y=2.1) > text(s, at=p) # add anova table to regression plot > plot(s) # new plot - dot chart of chisq-d.f. > # latex(s) # nice printout - creates anova.g.tex > options(datdist=NULL) > > > # Simulate data with from a given model, and display exactly which > # hypotheses are being tested > > set.seed(123) > age <- rnorm(500, 50, 15) > treat <- factor(sample(c('a','b','c'), 500,TRUE)) > bp <- rnorm(500, 120, 10) > y <- ifelse(treat=='a', (age-50)*.05, abs(age-50)*.08) + 3*(treat=='c') + + pmax(bp, 100)*.09 + rnorm(500) > f <- ols(y ~ treat*lsp(age,50) + rcs(bp,4)) > print(names(coef(f)), quote=FALSE) [1] Intercept treat=b treat=c age age' [6] bp bp' bp'' treat=b * age treat=c * age [11] treat=b * age' treat=c * age' > specs(f) ols(formula = y ~ treat * lsp(age, 50) + rcs(bp, 4)) Assumption Parameters d.f. treat category a b c 2 age lspline 50 2 bp rcspline 103.28 116.6 123.63 137.53 3 treat * age interaction linear x nonlinear - Ag(B) 4 > anova(f) Analysis of Variance Response: y Factor d.f. Partial SS MS F treat (Factor+Higher Order Factors) 6 1421.70 236.950 241.73 All Interactions 4 61.55 15.387 15.70 age (Factor+Higher Order Factors) 6 222.01 37.001 37.75 All Interactions 4 61.55 15.387 15.70 Nonlinear (Factor+Higher Order Factors) 3 156.88 52.294 53.35 bp 3 344.33 114.778 117.09 Nonlinear 2 1.41 0.706 0.72 treat * age (Factor+Higher Order Factors) 4 61.55 15.387 15.70 Nonlinear 2 22.87 11.436 11.67 Nonlinear Interaction : f(A,B) vs. AB 2 22.87 11.436 11.67 TOTAL NONLINEAR 5 157.75 31.550 32.19 TOTAL NONLINEAR + INTERACTION 7 194.53 27.790 28.35 REGRESSION 11 1861.11 169.192 172.61 ERROR 488 478.35 0.980 P <.0001 <.0001 <.0001 <.0001 <.0001 <.0001 0.487 <.0001 <.0001 <.0001 <.0001 <.0001 <.0001 > an <- anova(f) > options(digits=3) > print(an, 'subscripts') Analysis of Variance Response: y Factor d.f. Partial SS MS F treat (Factor+Higher Order Factors) 6 1421.70 236.950 241.73 All Interactions 4 61.55 15.387 15.70 age (Factor+Higher Order Factors) 6 222.01 37.001 37.75 All Interactions 4 61.55 15.387 15.70 Nonlinear (Factor+Higher Order Factors) 3 156.88 52.294 53.35 bp 3 344.33 114.778 117.09 Nonlinear 2 1.41 0.706 0.72 treat * age (Factor+Higher Order Factors) 4 61.55 15.387 15.70 Nonlinear 2 22.87 11.436 11.67 Nonlinear Interaction : f(A,B) vs. AB 2 22.87 11.436 11.67 TOTAL NONLINEAR 5 157.75 31.550 32.19 TOTAL NONLINEAR + INTERACTION 7 194.53 27.790 28.35 REGRESSION 11 1861.11 169.192 172.61 ERROR 488 478.35 0.980 P Tested <.0001 1-2,8-11 <.0001 8-11 <.0001 3-4,8-11 <.0001 8-11 <.0001 4,10-11 <.0001 5-7 0.487 6-7 <.0001 8-11 <.0001 10-11 <.0001 10-11 <.0001 4,6-7,10-11 <.0001 4,6-11 <.0001 1-11 Subscripts correspond to: [1] treat=b treat=c age age' bp [6] bp' bp'' treat=b * age treat=c * age treat=b * age' [11] treat=c * age' > print(an, 'dots') Analysis of Variance Response: y Factor d.f. Partial SS MS F treat (Factor+Higher Order Factors) 6 1421.70 236.950 241.73 All Interactions 4 61.55 15.387 15.70 age (Factor+Higher Order Factors) 6 222.01 37.001 37.75 All Interactions 4 61.55 15.387 15.70 Nonlinear (Factor+Higher Order Factors) 3 156.88 52.294 53.35 bp 3 344.33 114.778 117.09 Nonlinear 2 1.41 0.706 0.72 treat * age (Factor+Higher Order Factors) 4 61.55 15.387 15.70 Nonlinear 2 22.87 11.436 11.67 Nonlinear Interaction : f(A,B) vs. AB 2 22.87 11.436 11.67 TOTAL NONLINEAR 5 157.75 31.550 32.19 TOTAL NONLINEAR + INTERACTION 7 194.53 27.790 28.35 REGRESSION 11 1861.11 169.192 172.61 ERROR 488 478.35 0.980 P Tested <.0001 .. .... <.0001 .... <.0001 .. .... <.0001 .... <.0001 . .. <.0001 ... 0.487 .. <.0001 .... <.0001 .. <.0001 .. <.0001 . .. .. <.0001 . ...... <.0001 ........... Subscripts correspond to: [1] treat=b treat=c age age' bp [6] bp' bp'' treat=b * age treat=c * age treat=b * age' [11] treat=c * age' > > an <- anova(f, test='Chisq', ss=FALSE) > plot(0:1) # make some plot > text(an, at=list(x=1.5,y=.6)) # add anova table to plot > plot(an) # new plot - dot chart of chisq-d.f. > # latex(an) # nice printout - creates anova.f.tex > > # Suppose that a researcher wants to make a big deal about a variable > # because it has the highest adjusted chi-square. We use the > # bootstrap to derive 0.95 confidence intervals for the ranks of all > # the effects in the model. We use the plot method for anova, with > # pl=FALSE to suppress actual plotting of chi-square - d.f. for each > # bootstrap repetition. We rank the negative of the adjusted > # chi-squares so that a rank of 1 is assigned to the highest. > # It is important to tell plot.anova.Design not to sort the results, > # or every bootstrap replication would have ranks of 1,2,3 for the stats. > > mydata <- data.frame(x1=runif(200), x2=runif(200), + sex=factor(sample(c('female','male'),200,TRUE))) > set.seed(9) # so can reproduce example > mydata$y <- ifelse(runif(200)<=plogis(mydata$x1-.5 + .5*(mydata$x2-.5) + + .5*(mydata$sex=='male')),1,0) > > if(.R.) { + library(boot) + b <- boot(mydata, function(data, i, ...) rank(-plot(anova( + lrm(y ~ rcs(x1,4)+pol(x2,2)+sex,data,subset=i)), + sort='none', pl=FALSE)), + R=25) # should really do R=500 but will take a while + Rank <- b$t0 + lim <- t(apply(b$t, 2, quantile, probs=c(.025,.975))) + } else { + b <- bootstrap(mydata, rank(-plot(anova( + lrm(y ~ rcs(x1,4)+pol(x2,2)+sex,mydata)), sort='none', pl=FALSE)), + B=25) # should really do B=500 but will take a while + Rank <- b$observed + lim <- limits.emp(b)[,c(1,4)] # get 0.025 and 0.975 quantiles + } Attaching package: 'boot' The following object(s) are masked from package:survival : aml > > # Use the Hmisc Dotplot function to display ranks and their confidence > # intervals. Sort the categories by descending adj. chi-square, for ranks > original.chisq <- plot(anova(lrm(y ~ rcs(x1,4)+pol(x2,2)+sex,data=mydata)), + sort='none', pl=FALSE) > predictor <- as.factor(names(original.chisq)) > predictor <- reorder.factor(predictor, -original.chisq) > > Dotplot(predictor ~ Cbind(Rank, lim), pch=3, xlab='Rank', + main=if(.R.) expression(paste( + 'Ranks and 0.95 Confidence Limits for ',chi^2,' - d.f.')) else + 'Ranks and 0.95 Confidence Limits for Chi-square - d.f.') Loading required package: grid Loading required package: lattice Attaching package: 'lattice' The following object(s) are masked from package:boot : melanoma > > > > cleanEx(); ..nameEx <- "bj" > > ### * bj > > flush(stderr()); flush(stdout()) > > ### Name: bj > ### Title: Buckley-James Multiple Regression Model > ### Aliases: bj bj.fit residuals.bj print.bj validate.bj bjplot > ### Keywords: models survival > > ### ** Examples > > set.seed(1) > ftime <- 10*rexp(200) > stroke <- ifelse(ftime > 10, 0, 1) > ftime <- pmin(ftime, 10) > units(ftime) <- "Month" > age <- rnorm(200, 70, 10) > hospital <- factor(sample(c('a','b'),200,TRUE)) > dd <- datadist(age, hospital) > options(datadist="dd") > > f <- bj(Surv(ftime, stroke) ~ rcs(age,5) + hospital, x=TRUE, y=TRUE) > # add link="identity" to use a censored normal regression model instead > # of a lognormal one > anova(f) Wald Statistics Response: Surv(ftime, stroke) Factor Chi-Square d.f. P age 9.13 4 0.0580 Nonlinear 9.10 3 0.0279 hospital 1.43 1 0.2325 TOTAL 11.53 5 0.0418 > fastbw(f) Deleted Chi-Sq d.f. P Residual d.f. P AIC hospital 1.43 1 0.233 1.43 1 0.233 -0.57 Approximate Estimates after Deleting Factors Coef S.E. Wald Z P Intercept -1.51928 1.95897 -0.7756 0.438013 age 0.06533 0.03491 1.8715 0.061270 age' -0.49754 0.17772 -2.7995 0.005117 age'' 4.97246 1.66575 2.9851 0.002835 age''' -6.99327 2.38615 -2.9308 0.003381 Factors in Final Model [1] age > validate(f, B=15) Iteration: 1 2 No convergence in 45 steps Warning in fit(x[xtrain, , drop = FALSE], y[train, , drop = FALSE], iter = i, : bj.fit failed 3 No convergence in 45 steps Warning in fit(x[xtrain, , drop = FALSE], y[train, , drop = FALSE], iter = i, : bj.fit failed 4 5 6 No convergence in 45 steps Warning in fit(x[xtrain, , drop = FALSE], y[train, , drop = FALSE], iter = i, : bj.fit failed 7 8 9 10 11 No convergence in 45 steps Warning in fit(x[xtrain, , drop = FALSE], y[train, , drop = FALSE], iter = i, : bj.fit failed 12 13 14 No convergence in 45 steps Warning in fit(x[xtrain, , drop = FALSE], y[train, , drop = FALSE], iter = i, : bj.fit failed 15 Divergence or singularity in 5 samples index.orig training test optimism index.corrected n Dxy 0.151 0.204 0.141 0.0627 0.0881 10 > plot(f, age=NA, hospital=NA) # needs datadist since no explicit age,hosp. > coef(f) # look at regression coefficients Intercept age age' age'' age''' hospital=b -1.0590 0.0554 -0.4562 4.6885 -6.6733 0.2024 > coef(psm(Surv(ftime, stroke) ~ rcs(age,5) + hospital, dist='lognormal')) (Intercept) age age' age'' age''' hospital=b -0.7259 0.0531 -0.4681 4.9181 -7.0953 0.2237 > # compare with coefficients from likelihood-based > # log-normal regression model > # use dist='gau' not under R > > r <- resid(f, 'censored.normalized') > survplot(survfit(r), conf='none') > # plot Kaplan-Meier estimate of > # survival function of standardized residuals > survplot(survfit(r ~ cut2(age, g=2)), conf='none') > # may desire both strata to be n(0,1) > options(datadist=NULL) > > > > cleanEx(); ..nameEx <- "bootcov" > > ### * bootcov > > flush(stderr()); flush(stdout()) > > ### Name: bootcov > ### Title: Bootstrap Covariance and Distribution for Regression > ### Coefficients > ### Aliases: bootcov bootplot bootplot.bootcov confplot confplot.bootcov > ### histdensity > ### Keywords: models regression htest methods hplot > > ### ** Examples > > set.seed(191) > x <- exp(rnorm(200)) > logit <- 1 + x/2 > y <- ifelse(runif(200) <= plogis(logit), 1, 0) > f <- lrm(y ~ pol(x,2), x=TRUE, y=TRUE) > g <- bootcov(f, B=50, pr=TRUE, coef.reps=TRUE) 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 > anova(g) # using bootstrap covariance estimates Wald Statistics Response: y Factor Chi-Square d.f. P x 1.96 2 0.376 Nonlinear 0.06 1 0.805 TOTAL 1.96 2 0.376 > fastbw(g) # using bootstrap covariance estimates Deleted Chi-Sq d.f. P Residual d.f. P AIC x 1.96 2 0.376 1.96 2 0.376 -2.04 Approximate Estimates after Deleting Factors Coef S.E. Wald Z P [1,] 1.305 0.1981 6.588 4.465e-11 Factors in Final Model None > beta <- g$boot.Coef[,1] > hist(beta, nclass=15) #look at normality of parameter estimates > qqnorm(beta) > # bootplot would be better than these last two commands > > # A dataset contains a variable number of observations per subject, > # and all observations are laid out in separate rows. The responses > # represent whether or not a given segment of the coronary arteries > # is occluded. Segments of arteries may not operate independently > # in the same patient. We assume a "working independence model" to > # get estimates of the coefficients, i.e., that estimates assuming > # independence are reasonably efficient. The job is then to get > # unbiased estimates of variances and covariances of these estimates. > > set.seed(1) > n.subjects <- 30 > ages <- rnorm(n.subjects, 50, 15) > sexes <- factor(sample(c('female','male'), n.subjects, TRUE)) > logit <- (ages-50)/5 > prob <- plogis(logit) # true prob not related to sex > id <- sample(1:n.subjects, 300, TRUE) # subjects sampled multiple times > table(table(id)) # frequencies of number of obs/subject 2 6 7 8 9 10 11 12 13 15 16 1 1 1 5 9 1 3 3 3 2 1 > age <- ages[id] > sex <- sexes[id] > # In truth, observations within subject are independent: > y <- ifelse(runif(300) <= prob[id], 1, 0) > f <- lrm(y ~ lsp(age,50)*sex, x=TRUE, y=TRUE) > g <- bootcov(f, id, B=50) # usually do B=200 or more singular information matrix in lrm.fit (rank= 5 ). Offending variable(s): age' * sex=male Warning in bootcov(f, id, B = 50) : fit failure in 1 resamples. Might try increasing maxit > diag(g$var)/diag(f$var) Intercept age age' sex=male age * sex=male 3.75 3.61 2.34 180.90 167.93 age' * sex=male 97.59 > # add ,group=w to re-sample from within each level of w > anova(g) # cluster-adjusted Wald statistics Wald Statistics Response: y Factor Chi-Square d.f. P age (Factor+Higher Order Factors) 46.50 4 <.0001 All Interactions 0.05 2 0.974 Nonlinear (Factor+Higher Order Factors) 0.28 2 0.871 sex (Factor+Higher Order Factors) 0.14 3 0.986 All Interactions 0.05 2 0.974 age * sex (Factor+Higher Order Factors) 0.05 2 0.974 Nonlinear 0.00 1 0.997 Nonlinear Interaction : f(A,B) vs. AB 0.00 1 0.997 TOTAL NONLINEAR 0.28 2 0.871 TOTAL NONLINEAR + INTERACTION 0.28 3 0.964 TOTAL 46.54 5 <.0001 > # fastbw(g) # cluster-adjusted backward elimination > plot(g, age=30:70, sex='female') # cluster-adjusted confidence bands > > # Get design effects based on inflation of the variances when compared > # with bootstrap estimates which ignore clustering > g2 <- bootcov(f, B=50) > diag(g$var)/diag(g2$var) Intercept age age' sex=male age * sex=male 0.00843 0.00955 0.01887 3.53230 3.56082 age' * sex=male 3.60403 > > # Get design effects based on pooled tests of factors in model > anova(g2)[,1] / anova(g)[,1] age (Factor+Higher Order Factors) 0.657 All Interactions 0.882 Nonlinear (Factor+Higher Order Factors) 0.499 sex (Factor+Higher Order Factors) 0.718 All Interactions 0.882 age * sex (Factor+Higher Order Factors) 0.882 Nonlinear 3.604 Nonlinear Interaction : f(A,B) vs. AB 3.604 TOTAL NONLINEAR 0.499 TOTAL NONLINEAR + INTERACTION 1.111 TOTAL 0.656 > > # Simulate binary data where there is a strong > # age x sex interaction with linear age effects > # for both sexes, but where not knowing that > # we fit a quadratic model. Use the bootstrap > # to get bootstrap distributions of various > # effects, and to get pointwise and simultaneous > # confidence limits > > set.seed(71) > n <- 500 > age <- rnorm(n, 50, 10) > sex <- factor(sample(c('female','male'), n, rep=TRUE)) > L <- ifelse(sex=='male', 0, .1*(age-50)) > y <- ifelse(runif(n)<=plogis(L), 1, 0) > > f <- lrm(y ~ sex*pol(age,2), x=TRUE, y=TRUE) > b <- bootcov(f, B=50, coef.reps=TRUE, pr=TRUE) # better: B=500 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 > > par(mfrow=c(2,3)) > # Assess normality of regression estimates > bootplot(b, which=1:6, what='qq') > # They appear somewhat non-normal > > # Plot histograms and estimated densities > # for 6 coefficients > w <- bootplot(b, which=1:6) > # Print bootstrap quantiles > w$quantiles 0.050 0.025 0.005 0.950 Intercept -20.45330 -21.24902 -22.28109 -6.477199 Coefficient of sex=male -1.03600 -2.74698 -4.20281 15.876238 Coefficient of age 0.14623 0.09910 0.07863 0.676707 Coefficient of age^2 -0.00522 -0.00545 -0.00713 -0.000411 Coefficient of sex=male * age -0.51609 -0.54544 -0.68573 0.091802 Coefficient of sex=male * age^2 -0.00152 -0.00249 -0.00293 0.004102 0.975 0.995 Intercept -4.653642 -3.97e+00 Coefficient of sex=male 16.409298 1.82e+01 Coefficient of age 0.685469 8.04e-01 Coefficient of age^2 -0.000109 -2.15e-05 Coefficient of sex=male * age 0.187578 2.35e-01 Coefficient of sex=male * age^2 0.004486 6.31e-03 > > # Estimate regression function for females > # for a sequence of ages > ages <- seq(25, 75, length=100) > label(ages) <- 'Age' > > # Plot fitted function and pointwise normal- > # theory confidence bands > par(mfrow=c(1,1)) > p <- plot(f, age=ages, sex='female') > w <- p$x.xbeta > # Save curve coordinates for later automatic > # labeling using labcurve in the Hmisc library > curves <- vector('list',8) > curves[[1]] <- list(x=w[,1],y=w[,3]) > curves[[2]] <- list(x=w[,1],y=w[,4]) > > # Add pointwise normal-distribution confidence > # bands using unconditional variance-covariance > # matrix from the 500 bootstrap reps > p <- plot(b, age=ages, sex='female', add=TRUE, lty=3) > w <- p$x.xbeta > curves[[3]] <- list(x=w[,1],y=w[,3]) > curves[[4]] <- list(x=w[,1],y=w[,4]) > > dframe <- expand.grid(sex='female', age=ages) > X <- predict(f, dframe, type='x') # Full design matrix > > # Add pointwise bootstrap nonparametric > # confidence limits > p <- confplot(b, X=X, against=ages, method='pointwise', + add=TRUE, lty.conf=4) > curves[[5]] <- list(x=ages, y=p$lower) > curves[[6]] <- list(x=ages, y=p$upper) > > # Add simultaneous bootstrap confidence band > p <- confplot(b, X=X, against=ages, add=TRUE, lty.conf=5) > curves[[7]] <- list(x=ages, y=p$lower) > curves[[8]] <- list(x=ages, y=p$upper) > lab <- c('a','a','b','b','c','c','d','d') > labcurve(curves, lab) > > # Now get bootstrap simultaneous confidence set for > # female:male odds ratios for a variety of ages > > dframe <- expand.grid(age=ages, sex=c('female','male')) > X <- predict(f, dframe, type='x') # design matrix > f.minus.m <- X[1:100,] - X[101:200,] > # First 100 rows are for females. By subtracting > # design matrices are able to get Xf*Beta - Xm*Beta > # = (Xf - Xm)*Beta > > confplot(b, X=f.minus.m, against=ages, + method='pointwise', ylab='F:M Log Odds Ratio') > confplot(b, X=f.minus.m, against=ages, + lty.conf=3, add=TRUE) > > # contrast.Design makes it easier to compute the design matrix for use > # in bootstrapping contrasts: > > f.minus.m <- contrast(f, list(sex='female',age=ages), + list(sex='male', age=ages))$X > confplot(b, X=f.minus.m) $fitted [,1] 1 -2.3810 2 -2.3232 3 -2.2657 4 -2.2087 5 -2.1520 6 -2.0956 7 -2.0396 8 -1.9840 9 -1.9288 10 -1.8740 11 -1.8195 12 -1.7654 13 -1.7116 14 -1.6583 15 -1.6053 16 -1.5527 17 -1.5004 18 -1.4485 19 -1.3970 20 -1.3459 21 -1.2951 22 -1.2447 23 -1.1947 24 -1.1451 25 -1.0958 26 -1.0469 27 -0.9984 28 -0.9502 29 -0.9024 30 -0.8550 31 -0.8080 32 -0.7613 33 -0.7150 34 -0.6691 35 -0.6235 36 -0.5783 37 -0.5335 38 -0.4891 39 -0.4450 40 -0.4013 41 -0.3580 42 -0.3150 43 -0.2724 44 -0.2302 45 -0.1884 46 -0.1469 47 -0.1058 48 -0.0651 49 -0.0247 50 0.0152 51 0.0549 52 0.0941 53 0.1330 54 0.1714 55 0.2096 56 0.2473 57 0.2847 58 0.3217 59 0.3583 60 0.3946 61 0.4305 62 0.4660 63 0.5011 64 0.5359 65 0.5703 66 0.6044 67 0.6380 68 0.6713 69 0.7042 70 0.7368 71 0.7689 72 0.8007 73 0.8322 74 0.8632 75 0.8939 76 0.9242 77 0.9542 78 0.9837 79 1.0129 80 1.0418 81 1.0702 82 1.0983 83 1.1260 84 1.1533 85 1.1803 86 1.2069 87 1.2331 88 1.2590 89 1.2845 90 1.3096 91 1.3343 92 1.3587 93 1.3827 94 1.4063 95 1.4295 96 1.4524 97 1.4749 98 1.4970 99 1.5188 100 1.5402 $upper 1 2 3 4 5 6 7 8 0.33841 0.29209 0.24728 0.20401 0.16226 0.12204 0.08334 0.04617 9 10 11 12 13 14 15 16 0.01052 -0.02360 -0.05620 -0.08727 -0.11681 -0.14483 -0.17132 -0.19629 17 18 19 20 21 22 23 24 -0.21973 -0.24164 -0.24470 -0.23552 -0.22566 -0.21512 -0.20389 -0.19198 25 26 27 28 29 30 31 32 -0.17939 -0.16611 -0.15214 -0.13750 -0.12217 -0.10615 -0.08945 -0.07207 33 34 35 36 37 38 39 40 -0.05400 -0.03525 -0.01581 0.00431 0.02512 0.04660 0.06878 0.11293 41 42 43 44 45 46 47 48 0.16856 0.22253 0.27487 0.32556 0.37461 0.42201 0.46778 0.51189 49 50 51 52 53 54 55 56 0.55437 0.59520 0.63438 0.67192 0.70782 0.74208 0.78140 0.82739 57 58 59 60 61 62 63 64 0.87249 0.91671 0.96004 1.00249 1.04406 1.08475 1.12455 1.16347 65 66 67 68 69 70 71 72 1.20151 1.23866 1.27493 1.31032 1.34483 1.37845 1.41119 1.44305 73 74 75 76 77 78 79 80 1.47402 1.50411 1.53332 1.56165 1.58909 1.61565 1.65541 1.69632 81 82 83 84 85 86 87 88 1.73659 1.77621 1.81520 1.85354 1.89125 1.92831 1.96474 2.01577 89 90 91 92 93 94 95 96 2.10218 2.19012 2.27958 2.37057 2.46309 2.55713 2.65270 2.74979 97 98 99 100 2.84841 2.94855 3.05022 3.15342 $lower 1 2 3 4 5 6 7 8 -5.915345 -5.752203 -5.590991 -5.431710 -5.274360 -5.118942 -4.965455 -4.813898 9 10 11 12 13 14 15 16 -4.664273 -4.516579 -4.370816 -4.226985 -4.085084 -3.945114 -3.807076 -3.670968 17 18 19 20 21 22 23 24 -3.536792 -3.404547 -3.274233 -3.145850 -3.019398 -2.894878 -2.772288 -2.651630 25 26 27 28 29 30 31 32 -2.532902 -2.416106 -2.301241 -2.188307 -2.077304 -1.968232 -1.861092 -1.755882 33 34 35 36 37 38 39 40 -1.652604 -1.551256 -1.451840 -1.354355 -1.258801 -1.165178 -1.073487 -0.985685 41 42 43 44 45 46 47 48 -0.942848 -0.899287 -0.855003 -0.809995 -0.764264 -0.717809 -0.670631 -0.622729 49 50 51 52 53 54 55 56 -0.574104 -0.524755 -0.474683 -0.423888 -0.372369 -0.320127 -0.269875 -0.230848 57 58 59 60 61 62 63 64 -0.191223 -0.150997 -0.110171 -0.068746 -0.026721 0.015903 0.059128 0.102952 65 66 67 68 69 70 71 72 0.147376 0.192400 0.238023 0.284247 0.321252 0.325329 0.327071 0.326479 73 74 75 76 77 78 79 80 0.323553 0.318292 0.310696 0.300766 0.288501 0.273902 0.256968 0.237700 81 82 83 84 85 86 87 88 0.216097 0.192159 0.165887 0.137281 0.106340 0.073064 0.037454 -0.000491 89 90 91 92 93 94 95 96 -0.040770 -0.083384 -0.128333 -0.175616 -0.225233 -0.277185 -0.331472 -0.388093 97 98 99 100 -0.447049 -0.508339 -0.571964 -0.637924 > > # For a quadratic binary logistic regression model use bootstrap > # bumping to estimate coefficients under a monotonicity constraint > set.seed(177) > n <- 400 > x <- runif(n) > logit <- 3*(x^2-1) > y <- rbinom(n, size=1, prob=plogis(logit)) > f <- lrm(y ~ pol(x,2), x=TRUE, y=TRUE) > k <- coef(f) > k Intercept x x^2 -3.78 1.41 2.49 > vertex <- -k[2]/(2*k[3]) > vertex x -0.283 > > # Outside [0,1] so fit satisfies monotonicity constraint within > # x in [0,1], i.e., original fit is the constrained MLE > > g <- bootcov(f, B=50, coef.reps=TRUE) > bootcoef <- g$boot.Coef # 100x3 matrix > vertex <- -bootcoef[,2]/(2*bootcoef[,3]) > table(cut2(vertex, c(0,1))) [-6.12, 0.00) [ 0.00, 1.00) [ 1.00, 5.58] 29 14 7 > mono <- !(vertex >= 0 & vertex <= 1) > mean(mono) # estimate of Prob{monotonicity in [0,1]} [1] 0.72 > > var(bootcoef) # var-cov matrix for unconstrained estimates Intercept x x^2 Intercept 0.948 -2.81 1.90 x -2.808 9.78 -7.32 x^2 1.900 -7.32 5.83 > var(bootcoef[mono,]) # for constrained estimates Intercept x x^2 Intercept 0.963 -2.60 1.64 x -2.600 8.38 -5.97 x^2 1.637 -5.97 4.62 > > # Find second-best vector of coefficient estimates, i.e., best > # from among bootstrap estimates > g$boot.Coef[order(g$boot.loglik[-length(g$boot.loglik)])[1],] Intercept x x^2 -3.565 0.827 2.990 > # Note closeness to MLE > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "calibrate" > > ### * calibrate > > flush(stderr()); flush(stdout()) > > ### Name: calibrate > ### Title: Resampling Model Calibration > ### Aliases: calibrate calibrate.default calibrate.cph calibrate.psm > ### print.calibrate print.calibrate.default plot.calibrate > ### plot.calibrate.default > ### Keywords: methods models regression survival hplot > > ### ** Examples > > set.seed(1) > d.time <- rexp(200) > x1 <- runif(200) > x2 <- factor(sample(c('a','b','c'),200,TRUE)) > f <- cph(Surv(d.time) ~ pol(x1,2)*x2, x=TRUE, y=TRUE, surv=TRUE, time.inc=2) > #or f <- psm(S ~ ...) > cal <- calibrate(f, u=2, m=50, B=20) # usually B=200 or 300 Using Cox survival estimates at 2 Days Averaging 2 repetitions of B= 10 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations x n events KM std.err [1,] 0.06433 50 50 0.06 0.1990 [2,] 0.11124 50 50 0.16 0.1768 [3,] 0.14900 50 50 0.10 0.1843 [4,] 0.20614 50 50 0.20 0.1757 Iteration: 1 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 2 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 3 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 4 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 5 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 6 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 7 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 8 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 9 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 10 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations x n events KM std.err [1,] 0.06433 50 50 0.06 0.1990 [2,] 0.11124 50 50 0.16 0.1768 [3,] 0.14900 50 50 0.10 0.1843 [4,] 0.20614 50 50 0.20 0.1757 Iteration: 1 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 2 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 3 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 4 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 5 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 6 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 7 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 8 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 9 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations 10 Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Warning in groupkm(cox, Surv(y[, 1], y[, 2]), u = u, cuts = orig.cuts) : one interval had < 2 observations Mean over 2 overall replications mean.optimism mean.corrected n [1,] -0.01526 0.0109 20 [2,] 0.00094 0.0478 20 [3,] -0.00463 -0.0444 20 [4,] 0.04720 -0.0533 20 > plot(cal) > > y <- sample(0:2, 200, TRUE) > x1 <- runif(200) > x2 <- runif(200) > x3 <- runif(200) > x4 <- runif(200) > f <- lrm(y ~ x1+x2+x3*x4, x=TRUE, y=TRUE) > cal <- calibrate(f, kint=2, predy=seq(.2,.8,length=60), + group=y) Iteration: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 > # group= does k-sample validation: make resamples have same > # numbers of subjects in each level of y as original sample > > plot(cal) n=200 Mean absolute error=0.0330 Mean squared error=0.00212 0.9 Quantile of absolute error=0.0446 > #See the example for the validate function for a method of validating > #continuation ratio ordinal logistic models. You can do the same > #thing for calibrate > > > > cleanEx(); ..nameEx <- "contrast" > > ### * contrast > > flush(stderr()); flush(stdout()) > > ### Name: contrast.Design > ### Title: General Contrasts of Regression Coefficients > ### Aliases: contrast contrast.Design print.contrast.Design > ### Keywords: htest models regression > > ### ** Examples > > set.seed(1) > age <- rnorm(200,40,12) > sex <- factor(sample(c('female','male'),200,TRUE)) > logit <- (sex=='male') + (age-40)/5 > y <- ifelse(runif(200) <= plogis(logit), 1, 0) > f <- lrm(y ~ pol(age,2)*sex) > # Compare a 30 year old female to a 40 year old male > # (with or without age x sex interaction in the model) > contrast(f, list(sex='female', age=30), list(sex='male', age=40)) Contrast S.E. Lower Upper Z Pr(>|z|) 1 -3.96 0.854 -5.63 -2.28 -4.64 0 > > # For a model containing two treatments, centers, and treatment > # x center interaction, get 0.95 confidence intervals separately > # by cente > center <- factor(sample(letters[1:8],500,TRUE)) > treat <- factor(sample(c('a','b'), 500,TRUE)) > y <- 8*(treat=='b') + rnorm(500,100,20) > f <- ols(y ~ treat*center) > > lc <- levels(center) > contrast(f, list(treat='b', center=lc), + list(treat='a', center=lc)) center Contrast S.E. Lower Upper t Pr(>|t|) a -2.73 4.70 -11.974 6.51 -0.58 0.5616 b 4.29 5.28 -6.091 14.67 0.81 0.4173 c 11.45 4.86 1.901 21.01 2.36 0.0189 d 10.52 4.87 0.948 20.09 2.16 0.0313 e 24.08 6.10 12.097 36.07 3.95 0.0001 f 9.09 5.14 -1.012 19.18 1.77 0.0777 g 11.91 5.39 1.325 22.50 2.21 0.0275 h -4.47 4.86 -14.032 5.08 -0.92 0.3580 Error d.f.= 484 > > # Get 'Type III' contrast: average b - a treatment effect over > # centers, weighting centers equally (which is almost always > # an unreasonable thing to do) > contrast(f, list(treat='b', center=lc), + list(treat='a', center=lc), + type='average') Contrast S.E. Lower Upper t Pr(>|t|) 1 8.02 1.83 4.43 11.6 4.39 0 Error d.f.= 484 > > # Get 'Type II' contrast, weighting centers by the number of > # subjects per center. Print the design contrast matrix used. > k <- contrast(f, list(treat='b', center=lc), + list(treat='a', center=lc), + type='average', weights=table(center)) > print(k, X=TRUE) Contrast S.E. Lower Upper t Pr(>|t|) 1 7.1 1.81 3.55 10.6 3.93 1e-04 Error d.f.= 484 Design Matrix for Contrasts Intercept treat=b center=b center=c center=d center=e center=f center=g 1 0 1 0 0 0 0 0 0 center=h treat=b * center=b treat=b * center=c treat=b * center=d 1 0 0.12 0.136 0.136 treat=b * center=e treat=b * center=f treat=b * center=g treat=b * center=h 1 0.09 0.122 0.112 0.136 > # Note: If other variables had interacted with either treat > # or center, we may want to list settings for these variables > # inside the list()'s, so as to not use default settings > > # For a 4-treatment study, get all comparisons with treatment 'a' > treat <- factor(sample(c('a','b','c','d'), 500,TRUE)) > y <- 8*(treat=='b') + rnorm(500,100,20) > dd <- datadist(treat,center); options(datadist='dd') > f <- ols(y ~ treat*center) > lt <- levels(treat) > contrast(f, list(treat=lt[-1]), + list(treat=lt[ 1]), + cnames=paste(lt[-1],lt[1],sep=':'), conf.int=1-.05/3) Contrast S.E. Lower Upper t Pr(>|t|) b:a 7.44 7.57 -10.8 25.62 0.98 0.326 c:a -7.19 7.10 -24.3 9.87 -1.01 0.312 d:a -5.67 6.51 -21.3 9.96 -0.87 0.384 Error d.f.= 468 > > # Compare each treatment with average of all others > for(i in 1:length(lt)) { + cat('Comparing with',lt[i],'\n\n') + print(contrast(f, list(treat=lt[-i]), + list(treat=lt[ i]), type='average')) + } Comparing with a Contrast S.E. Lower Upper t Pr(>|t|) 1 -1.81 5.52 -12.7 9.04 -0.33 0.743 Error d.f.= 468 Comparing with b Contrast S.E. Lower Upper t Pr(>|t|) 1 -11.7 6.68 -24.8 1.40 -1.76 0.0799 Error d.f.= 468 Comparing with c Contrast S.E. Lower Upper t Pr(>|t|) 1 7.78 6.2 -4.42 20.0 1.25 0.211 Error d.f.= 468 Comparing with d Contrast S.E. Lower Upper t Pr(>|t|) 1 5.75 5.6 -5.24 16.7 1.03 0.304 Error d.f.= 468 > options(datadist=NULL) > > # Six ways to get the same thing, for a variable that > # appears linearly in a model and does not interact with > # any other variables. We estimate the change in y per > # unit change in a predictor x1. Methods 4, 5 also > # provide confidence limits. Method 6 computes nonparametric > # bootstrap confidence limits. Methods 2-6 can work > # for models that are nonlinear or non-additive in x1. > # For that case more care is needed in choice of settings > # for x1 and the variables that interact with x1. > > ## Not run: > ##D coef(fit)['x1'] # method 1 > ##D diff(predict(fit, gendata(x1=c(0,1)))) # method 2 > ##D g <- Function(fit) # method 3 > ##D g(x1=1) - g(x1=0) > ##D summary(fit, x1=c(0,1)) # method 4 > ##D k <- contrast(fit, list(x1=1), list(x1=0)) # method 5 > ##D print(k, X=TRUE) > ##D fit <- update(fit, x=TRUE, y=TRUE) # method 6 > ##D b <- bootcov(fit, B=500, coef.reps=TRUE) > ##D bootplot(b, X=k$X) # bootstrap distribution and CL > ##D > ##D # In a model containing age, race, and sex, > ##D # compute an estimate of the mean response for a > ##D # 50 year old male, averaged over the races using > ##D # observed frequencies for the races as weights > ##D > ##D f <- ols(y ~ age + race + sex) > ##D contrast(f, list(age=50, sex='male', race=levels(race)), > ##D type='average', weights=table(race)) > ## End(Not run) > > # Plot the treatment effect (drug - placebo) as a function of age > # and sex in a model in which age nonlinearly interacts with treatment > # for females only > set.seed(1) > n <- 800 > treat <- factor(sample(c('drug','placebo'), n,TRUE)) > sex <- factor(sample(c('female','male'), n,TRUE)) > age <- rnorm(n, 50, 10) > y <- .05*age + (sex=='female')*(treat=='drug')*.05*abs(age-50) + rnorm(n) > f <- ols(y ~ rcs(age,4)*treat*sex) > d <- datadist(age, treat, sex); options(datadist='d') > # show separate estimates by treatment and sex > plot(f, age=NA, treat=NA, sex='female') > plot(f, age=NA, treat=NA, sex='male') > ages <- seq(35,65,by=5); sexes <- c('female','male') > w <- contrast(f, list(treat='drug', age=ages, sex=sexes), + list(treat='placebo', age=ages, sex=sexes)) > xYplot(Cbind(Contrast, Lower, Upper) ~ age | sex, data=w, + ylab='Drug - Placebo') Loading required package: grid Loading required package: lattice > xYplot(Cbind(Contrast, Lower, Upper) ~ age, groups=sex, data=w, + ylab='Drug - Placebo', method='alt bars') > options(datadist=NULL) > > > > cleanEx(); ..nameEx <- "cph" > > ### * cph > > flush(stderr()); flush(stdout()) > > ### Name: cph > ### Title: Cox Proportional Hazards Model and Extensions > ### Aliases: cph Survival.cph Quantile.cph Mean.cph > ### Keywords: survival models nonparametric > > ### ** Examples > > # Simulate data from a population model in which the log hazard > # function is linear in age and there is no age x sex interaction > n <- 1000 > set.seed(731) > age <- 50 + 12*rnorm(n) > label(age) <- "Age" > sex <- factor(sample(c('Male','Female'), n, + rep=TRUE, prob=c(.6, .4))) > cens <- 15*runif(n) > h <- .02*exp(.04*(age-50)+.8*(sex=='Female')) > dt <- -log(runif(n))/h > label(dt) <- 'Follow-up Time' > e <- ifelse(dt <= cens,1,0) > dt <- pmin(dt, cens) > units(dt) <- "Year" > dd <- datadist(age, sex) > options(datadist='dd') > Srv <- Surv(dt,e) > > f <- cph(Srv ~ rcs(age,4) + sex, x=TRUE, y=TRUE) > cox.zph(f, "rank") # tests of PH rho chisq p age 0.0374 0.177 0.6741 age' -0.0351 0.171 0.6792 age'' 0.0268 0.101 0.7504 sex=Male 0.1486 3.977 0.0461 GLOBAL NA 4.867 0.3013 > anova(f) Wald Statistics Response: Srv Factor Chi-Square d.f. P age 57.75 3 <.0001 Nonlinear 8.17 2 0.0168 sex 18.75 1 <.0001 TOTAL 75.63 4 <.0001 > plot(f, age=NA, sex=NA) # plot age effect, 2 curves for 2 sexes > survplot(f, sex=NA) # time on x-axis, curves for x2 > res <- resid(f, "scaledsch") > time <- as.numeric(dimnames(res)[[1]]) > z <- loess(res[,4] ~ time, span=0.50) # residuals for sex > if(.R.) plot(time, fitted(z)) else + plot(z, coverage=0.95, confidence=7, xlab="t", + ylab="Scaled Schoenfeld Residual",ylim=c(-3,5)) > lines(supsmu(time, res[,4]),lty=2) > plot(cox.zph(f,"identity")) #Easier approach for last 6 lines > # latex(f) > > f <- cph(Srv ~ age + strat(sex), surv=TRUE) > g <- Survival(f) # g is a function > g(seq(.1,1,by=.1), stratum="sex=Male", type="poly") #could use stratum=2 [1] 0.999 0.996 0.994 0.993 0.992 0.992 0.992 0.992 0.992 0.991 > med <- Quantile(f) > plot(f, age=NA, fun=function(x) med(lp=x)) #plot median survival > > # g <- cph(Surv(hospital.charges) ~ age, surv=TRUE) > # Cox model very useful for analyzing highly skewed data, censored or not > # m <- Mean(g) > # m(0) # Predicted mean charge for reference age > > #Fit a time-dependent covariable representing the instantaneous effect > #of an intervening non-fatal event > rm(age) > set.seed(121) > dframe <- data.frame(failure.time=1:10, event=rep(0:1,5), + ie.time=c(NA,1.5,2.5,NA,3,4,NA,5,5,5), + age=sample(40:80,10,rep=TRUE)) > z <- ie.setup(dframe$failure.time, dframe$event, dframe$ie.time) > S <- z$S > ie.status <- z$ie.status > attach(dframe[z$subs,]) # replicates all variables > > f <- cph(S ~ age + ie.status, x=TRUE, y=TRUE) > #Must use x=TRUE,y=TRUE to get survival curves with time-dep. covariables > > #Get estimated survival curve for a 50-year old who has an intervening > #non-fatal event at 5 days > new <- data.frame(S=Surv(c(0,5), c(5,999), c(FALSE,FALSE)), age=rep(50,2), + ie.status=c(0,1)) > g <- survfit(f, new) > plot(c(0,g$time), c(1,g$surv[,2]), type='s', + xlab='Days', ylab='Survival Prob.') > # Not certain about what columns represent in g$surv for survival5 > # but appears to be for different ie.status > #or: > #g <- survest(f, new) > #plot(g$time, g$surv, type='s', xlab='Days', ylab='Survival Prob.') > > #Compare with estimates when there is no intervening event > new2 <- data.frame(S=Surv(c(0,5), c(5, 999), c(FALSE,FALSE)), age=rep(50,2), + ie.status=c(0,0)) > g2 <- survfit(f, new2) > lines(c(0,g2$time), c(1,g2$surv[,2]), type='s', lty=2) > #or: > #g2 <- survest(f, new2) > #lines(g2$time, g2$surv, type='s', lty=2) > detach("dframe[z$subs, ]") > options(datadist=NULL) > > > > cleanEx(); ..nameEx <- "cr.setup" > > ### * cr.setup > > flush(stderr()); flush(stdout()) > > ### Name: cr.setup > ### Title: Continuation Ratio Ordinal Logistic Setup > ### Aliases: cr.setup > ### Keywords: category models regression > > ### ** Examples > > y <- c(NA, 10, 21, 32, 32) > cr.setup(y) $y [1] NA 1 0 1 0 0 0 0 $cohort [1] all all y>=21 all y>=21 all y>=21 Levels: all y>=21 $subs [1] 1 2 3 3 4 4 5 5 $reps [1] 1 1 2 2 2 > > set.seed(171) > y <- sample(0:2, 100, rep=TRUE) > sex <- sample(c("f","m"),100,rep=TRUE) > sex <- factor(sex) > table(sex, y) y sex 0 1 2 f 23 15 13 m 12 23 14 > options(digits=5) > tapply(y==0, sex, mean) f m 0.45098 0.24490 > tapply(y==1, sex, mean) f m 0.29412 0.46939 > tapply(y==2, sex, mean) f m 0.25490 0.28571 > cohort <- y>=1 > tapply(y[cohort]==1, sex[cohort], mean) f m 0.53571 0.62162 > > u <- cr.setup(y) > Y <- u$y > cohort <- u$cohort > sex <- sex[u$subs] > > lrm(Y ~ cohort + sex) Logistic Regression Model lrm(formula = Y ~ cohort + sex) Frequencies of Responses 0 1 92 73 Obs Max Deriv Model L.R. d.f. P C Dxy 165 5e-08 10.32 2 0.0057 0.645 0.291 Gamma Tau-a R2 Brier 0.382 0.144 0.081 0.232 Coef S.E. Wald Z P Intercept -0.4299 0.2586 -1.66 0.0964 cohort=y>=1 1.0018 0.3317 3.02 0.0025 sex=m -0.3983 0.3263 -1.22 0.2223 > > > f <- lrm(Y ~ cohort*sex) # saturated model - has to fit all data cells > f Logistic Regression Model lrm(formula = Y ~ cohort * sex) Frequencies of Responses 0 1 92 73 Obs Max Deriv Model L.R. d.f. P C Dxy 165 3e-12 14.03 3 0.0029 0.659 0.317 Gamma Tau-a R2 Brier 0.417 0.157 0.109 0.226 Coef S.E. Wald Z P Intercept -0.1967 0.2814 -0.70 0.4845 cohort=y>=1 0.3398 0.4720 0.72 0.4716 sex=m -0.9293 0.4354 -2.13 0.0328 cohort=y>=1 * sex=m 1.2826 0.6694 1.92 0.0553 > > # In S-Plus: > #Prob(y=0|female): > # plogis(-.50078) > #Prob(y=0|male): > # plogis(-.50078+.11301) > #Prob(y=1|y>=1, female): > plogis(-.50078+.31845) [1] 0.45454 > #Prob(y=1|y>=1, male): > plogis(-.50078+.31845+.11301-.07379) [1] 0.46428 > > combinations <- expand.grid(cohort=levels(cohort), sex=levels(sex)) > combinations cohort sex 1 all f 2 y>=1 f 3 all m 4 y>=1 m > p <- predict(f, combinations, type="fitted") > p 1 2 3 4 0.45098 0.53571 0.24490 0.62162 > p0 <- p[c(1,3)] > p1 <- p[c(2,4)] > p1.unconditional <- (1 - p0) *p1 > p1.unconditional 1 3 0.29412 0.46939 > p2.unconditional <- 1 - p0 - p1.unconditional > p2.unconditional 1 3 0.25490 0.28571 > > ## Not run: > ##D dd <- datadist(inputdata) # do this on non-replicated data > ##D options(datadist='dd') > ##D pain.severity <- inputdata$pain.severity > ##D u <- cr.setup(pain.severity) > ##D # inputdata frame has age, sex with pain.severity > ##D attach(inputdata[u$subs,]) # replicate age, sex > ##D # If age, sex already available, could do age <- age[u$subs] etc., or > ##D # age <- rep(age, u$reps), etc. > ##D y <- u$y > ##D cohort <- u$cohort > ##D dd <- datadist(dd, cohort) # add to dd > ##D f <- lrm(y ~ cohort + age*sex) # ordinary cont. ratio model > ##D g <- lrm(y ~ cohort*sex + age, x=TRUE,y=TRUE) # allow unequal slopes for > ##D # sex across cutoffs > ##D cal <- calibrate(g, cluster=u$subs, subset=cohort=='all') > ##D # subs makes bootstrap sample the correct units, subset causes > ##D # Predicted Prob(pain.severity=0) to be checked for calibration > ## End(Not run) > > > > cleanEx(); ..nameEx <- "datadist" > > ### * datadist > > flush(stderr()); flush(stdout()) > > ### Name: datadist > ### Title: Distribution Summaries for Predictor Variables > ### Aliases: datadist print.datadist > ### Keywords: models nonparametric regression > > ### ** Examples > > ## Not run: > ##D d <- datadist(data=1) # use all variables in search pos. 1 > ##D d <- datadist(x1, x2, x3) > ##D page(d) # if your options(pager) leaves up a pop-up > ##D # window, this is a useful guide in analyses > ##D d <- datadist(data=2) # all variables in search pos. 2 > ##D d <- datadist(data=my.data.frame) > ##D d <- datadist(my.data.frame) # same as previous. Run for all potential vars. > ##D d <- datadist(x2, x3, data=my.data.frame) # combine variables > ##D d <- datadist(x2, x3, q.effect=c(.1,.9), q.display=c(0,1)) > ##D # uses inter-decile range odds ratios, > ##D # total range of variables for regression function plots > ##D d <- datadist(d, z) # add a new variable to an existing datadist > ##D options(datadist="d") #often a good idea, to store info with fit > ##D f <- ols(y ~ x1*x2*x3) > ##D > ##D options(datadist=NULL) #default at start of session > ##D f <- ols(y ~ x1*x2) > ##D d <- datadist(f) #info not stored in `f' > ##D d$limits["Adjust to","x1"] <- .5 #reset adjustment level to .5 > ##D options(datadist="d") > ##D > ##D f <- lrm(y ~ x1*x2, data=mydata) > ##D d <- datadist(f, data=mydata) > ##D options(datadist="d") > ##D > ##D f <- lrm(y ~ x1*x2) #datadist not used - specify all values for > ##D summary(f, x1=c(200,500,800), x2=c(1,3,5)) # obtaining predictions > ##D plot(f, x1=200:800, x2=3) > ##D > ##D # Change reference value to get a relative odds plot for a logistic model > ##D d$limits$age[2] <- 30 # make 30 the reference value for age > ##D # Could also do: d$limits["Adjust to","age"] <- 30 > ##D fit <- update(fit) # make new reference value take effect > ##D plot(fit, age=NA, ref.zer