This vignette demonstrates how to use the {samplezoo} package to generate datasets of varying sizes (small, medium, and large) with variables from multiple probability distributions.
Each dataset contains:
Variables/columns from common distributions such as Normal, Binomial, Poisson, and others.
Adjustable sample sizes to meet needs.
data_small <- samplezoo("small")
head(data_small)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 24.01904 57.10210 33.58085 0 5 5 8.109759 0.2746459 0.06072892
#> 2 52.61473 77.85494 52.61809 1 1 2 5.791296 0.2964852 0.58915675
#> 3 22.85430 50.32991 36.39559 0 3 5 21.151451 0.5230089 0.23070819
#> 4 37.74089 40.54657 50.42342 1 2 5 5.652803 0.1250447 0.34704880
#> 5 57.38885 78.49564 19.06029 0 0 5 10.482339 0.8279693 0.07115475
#> 6 40.53354 80.83017 65.38507 0 0 1 4.198844 0.4381156 0.15960607
#> gamma chisq t_dist
#> 1 2.543362 0.3497569 0.4721417
#> 2 5.347351 0.8319221 1.6167839
#> 3 4.450899 1.8383299 0.3548561
#> 4 2.412426 2.3599334 -0.1526986
#> 5 1.060736 0.8434991 0.5108567
#> 6 2.244033 2.2491525 0.4892787
data_medium <- samplezoo("medium")
head(data_medium)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 24.24724 73.29708 22.214310 0 1 5 11.0050841 0.65784701 0.09821555
#> 2 50.45329 53.61844 47.973893 0 2 3 4.9172695 0.27143713 0.06785489
#> 3 35.32228 59.49156 33.161308 0 1 3 5.2108455 0.00046993 0.64303826
#> 4 75.23637 62.10179 60.370066 0 0 5 15.5838949 0.99051073 0.12231201
#> 5 62.44062 63.27317 41.763082 0 1 8 2.5608562 0.69043010 0.31760048
#> 6 44.17461 46.24991 6.367756 0 0 4 0.1999096 0.38624775 0.44643234
#> gamma chisq t_dist
#> 1 1.987381 3.833893 0.7609412
#> 2 9.848751 6.164145 -0.5770737
#> 3 5.344355 3.140235 1.9390034
#> 4 0.941446 8.962906 -0.5503737
#> 5 1.299360 5.785310 0.3709101
#> 6 1.079053 10.883484 -0.6275835
data_large <- samplezoo("large")
head(data_large)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 50.95933 62.03111 33.57413 0 3 4 17.572730 0.71401790 0.4471074
#> 2 49.08107 67.50409 14.55490 0 0 5 1.043247 0.78324006 0.2566909
#> 3 44.92104 61.85103 46.24320 0 1 4 8.063268 0.03044348 0.2062338
#> 4 53.07700 59.65934 30.80349 1 1 0 9.497468 0.56686292 0.2061777
#> 5 60.49422 50.05604 65.94409 0 0 3 23.765609 0.64508146 0.3071335
#> 6 48.76375 53.25095 28.84037 0 1 4 11.256821 0.98811979 0.1356730
#> gamma chisq t_dist
#> 1 4.754528 13.016135 0.61540893
#> 2 3.528773 12.118134 -0.03291589
#> 3 4.090767 15.699932 0.66201871
#> 4 6.468221 6.432281 -0.51771387
#> 5 1.070185 7.569166 0.68421285
#> 6 2.163831 11.673227 -1.77789271
To ensure reproducibility and introduce controlled variation in your dataset, use set.seed() before generating random data.
set.seed(123)
data_large <- samplezoo("large")
head(data_large)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 41.59287 83.70725 23.274065 0 1 6 6.628373 0.5468223 0.08294255
#> 2 46.54734 58.33188 35.588540 0 0 5 21.305366 0.3900809 0.63544684
#> 3 73.38062 69.26961 -2.070295 0 2 4 0.189645 0.7262119 0.11520674
#> 4 51.05763 54.31848 6.643849 0 2 2 8.479098 0.5101462 0.38184206
#> 5 51.93932 62.25090 18.040743 0 0 2 11.885521 0.2964126 0.17196046
#> 6 75.72597 71.31986 6.687576 0 1 4 6.363993 0.1442317 0.35908460
#> gamma chisq t_dist
#> 1 6.9893762 10.286282 -0.3814568
#> 2 5.4087626 6.519658 -2.3409216
#> 3 1.2587867 8.011417 -0.4744159
#> 4 0.9871787 14.780626 0.4292511
#> 5 2.4021943 6.799788 -0.6692669
#> 6 4.2109032 17.858701 -0.3370763
set.seed(456)
data_large <- samplezoo("large")
head(data_large)
#> norm norm2 norm3 binom neg pois exp unif beta
#> 1 29.84718 68.13494 7.9885694 0 0 5 3.4417303 0.8866347 0.05413307
#> 2 59.32663 52.32066 21.2526086 0 3 3 0.8114356 0.7976466 0.07195440
#> 3 62.01312 62.47569 38.4789563 0 2 6 46.8038907 0.6469920 0.22555129
#> 4 29.16661 53.51086 -0.8656269 0 1 5 11.6955326 0.2036753 0.71455809
#> 5 39.28465 47.19406 47.7819258 1 1 1 0.3535625 0.3653401 0.34619912
#> 6 45.13908 63.33566 53.3620528 1 1 2 4.5592136 0.7628573 0.25880522
#> gamma chisq t_dist
#> 1 6.7914120 4.464348 -1.0150596
#> 2 3.0132520 8.062120 0.3262369
#> 3 4.7360954 10.969593 1.5141157
#> 4 5.1235878 6.249247 0.6432708
#> 5 6.6851637 4.358815 0.2025742
#> 6 0.3903841 20.019575 1.6257109