This vignette demonstrates how to use the {samplezoo} package to generate datasets of varying sizes (small, medium, and large) with variables from multiple probability distributions.
Each dataset contains:
Variables/columns from common distributions such as Normal, Binomial, Poisson, and others.
Adjustable sample sizes to meet needs.
data_small <- samplezoo("small")
head(data_small)
#> norm norm_2 norm_3 bern neg pois exp unif beta
#> 1 63.17078 66.45916 53.516804 1 0 4 6.481463 0.2410419 0.15525688
#> 2 35.45895 34.89860 34.605709 0 4 1 23.908570 0.3030301 0.04259387
#> 3 54.05800 72.73750 9.688985 1 0 4 14.056282 0.8598876 0.13860002
#> 4 53.16666 60.27601 47.394944 1 1 5 6.539186 0.1263046 0.63459452
#> 5 62.78092 59.92270 23.784479 0 0 5 33.971733 0.1820170 0.39191500
#> 6 17.86968 61.41629 27.855150 1 4 3 6.772410 0.5542087 0.19612948
#> gamma chi_sq t_dist f_dist
#> 1 5.960212 20.282286 -0.45809852 1.2033737
#> 2 1.221299 7.720472 0.67804593 0.4855198
#> 3 1.564677 6.296175 -0.02203252 0.8528776
#> 4 4.246349 8.944249 0.50670311 0.3362610
#> 5 5.091944 7.569064 -0.41179705 1.8880333
#> 6 4.362519 13.150804 -0.91484289 6.6996572
data_medium <- samplezoo("medium")
head(data_medium)
#> norm norm_2 norm_3 bern neg pois exp unif beta
#> 1 53.81610 62.52185 40.88840 1 2 1 1.347299418 0.7717186 0.17020415
#> 2 32.23838 69.00252 42.92283 1 0 3 7.564968284 0.1511976 0.04014913
#> 3 86.29126 75.02940 16.89684 0 1 2 5.029708603 0.2068348 0.26510409
#> 4 69.56563 43.08343 37.64668 1 0 4 0.005269265 0.8637200 0.03531230
#> 5 60.89404 62.91326 72.45100 1 1 2 16.713277119 0.1171863 0.38590862
#> 6 66.71723 47.28574 42.21524 0 1 2 5.709692985 0.7065846 0.27864365
#> gamma chi_sq t_dist f_dist
#> 1 1.2797782 7.563002 0.7040720 1.951610
#> 2 0.9491639 10.666864 -1.2038265 1.575964
#> 3 6.4444169 10.111492 0.2744420 1.406042
#> 4 0.9429589 3.229373 -1.5050957 1.224418
#> 5 7.4719117 12.628434 -0.2515078 2.970643
#> 6 14.4938132 10.112043 0.8163137 0.646780
data_large <- samplezoo("large")
head(data_large)
#> norm norm_2 norm_3 bern neg pois exp unif beta
#> 1 69.06926 54.78800 42.33331 1 2 4 49.0787612 0.29438380 0.1906598
#> 2 65.79423 60.23637 24.51366 0 0 6 23.1443750 0.90527332 0.1019475
#> 3 30.29612 51.96121 49.13541 0 1 3 0.3826007 0.89257582 0.2344017
#> 4 25.24485 72.24773 38.74936 0 3 1 6.6637183 0.04771752 0.4608193
#> 5 47.84531 58.30289 69.35160 1 0 2 3.6652096 0.80507796 0.3263387
#> 6 69.95993 60.63180 28.14861 0 0 7 0.5670046 0.99324266 0.2790363
#> gamma chi_sq t_dist f_dist
#> 1 2.087312 20.078999 0.3693474 0.5656647
#> 2 5.972130 11.447609 0.9822478 1.0532589
#> 3 3.619075 8.040936 0.6652285 0.7495735
#> 4 5.122370 14.457524 -0.8462000 1.8472764
#> 5 4.062681 14.823928 -0.7654113 1.8105827
#> 6 1.107407 9.837824 -0.1848859 0.2006835
To ensure reproducibility and introduce controlled variation in your dataset, use set.seed() before generating random data.
Reproducibility
set.seed(123)
data_large <- samplezoo("large")
head(data_large)
#> norm norm_2 norm_3 bern neg pois exp unif beta
#> 1 41.59287 83.70725 23.274065 0 1 6 6.628373 0.5468223 0.08294255
#> 2 46.54734 58.33188 35.588540 1 0 5 21.305366 0.3900809 0.63544684
#> 3 73.38062 69.26961 -2.070295 1 2 4 0.189645 0.7262119 0.11520674
#> 4 51.05763 54.31848 6.643849 0 2 2 8.479098 0.5101462 0.38184206
#> 5 51.93932 62.25090 18.040743 0 0 2 11.885521 0.2964126 0.17196046
#> 6 75.72597 71.31986 6.687576 0 1 4 6.363993 0.1442317 0.35908460
#> gamma chi_sq t_dist f_dist
#> 1 6.9893762 10.286282 -0.3814568 0.7264343
#> 2 5.4087626 6.519658 -2.3409216 0.9698166
#> 3 1.2587867 8.011417 -0.4744159 0.4329175
#> 4 0.9871787 14.780626 0.4292511 1.0227474
#> 5 2.4021943 6.799788 -0.6692669 2.7446729
#> 6 4.2109032 17.858701 -0.3370763 1.3993853
set.seed(123)
data_large <- samplezoo("large")
head(data_large)
#> norm norm_2 norm_3 bern neg pois exp unif beta
#> 1 41.59287 83.70725 23.274065 0 1 6 6.628373 0.5468223 0.08294255
#> 2 46.54734 58.33188 35.588540 1 0 5 21.305366 0.3900809 0.63544684
#> 3 73.38062 69.26961 -2.070295 1 2 4 0.189645 0.7262119 0.11520674
#> 4 51.05763 54.31848 6.643849 0 2 2 8.479098 0.5101462 0.38184206
#> 5 51.93932 62.25090 18.040743 0 0 2 11.885521 0.2964126 0.17196046
#> 6 75.72597 71.31986 6.687576 0 1 4 6.363993 0.1442317 0.35908460
#> gamma chi_sq t_dist f_dist
#> 1 6.9893762 10.286282 -0.3814568 0.7264343
#> 2 5.4087626 6.519658 -2.3409216 0.9698166
#> 3 1.2587867 8.011417 -0.4744159 0.4329175
#> 4 0.9871787 14.780626 0.4292511 1.0227474
#> 5 2.4021943 6.799788 -0.6692669 2.7446729
#> 6 4.2109032 17.858701 -0.3370763 1.3993853
Variation
set.seed(123)
data_large <- samplezoo("large")
head(data_large)
#> norm norm_2 norm_3 bern neg pois exp unif beta
#> 1 41.59287 83.70725 23.274065 0 1 6 6.628373 0.5468223 0.08294255
#> 2 46.54734 58.33188 35.588540 1 0 5 21.305366 0.3900809 0.63544684
#> 3 73.38062 69.26961 -2.070295 1 2 4 0.189645 0.7262119 0.11520674
#> 4 51.05763 54.31848 6.643849 0 2 2 8.479098 0.5101462 0.38184206
#> 5 51.93932 62.25090 18.040743 0 0 2 11.885521 0.2964126 0.17196046
#> 6 75.72597 71.31986 6.687576 0 1 4 6.363993 0.1442317 0.35908460
#> gamma chi_sq t_dist f_dist
#> 1 6.9893762 10.286282 -0.3814568 0.7264343
#> 2 5.4087626 6.519658 -2.3409216 0.9698166
#> 3 1.2587867 8.011417 -0.4744159 0.4329175
#> 4 0.9871787 14.780626 0.4292511 1.0227474
#> 5 2.4021943 6.799788 -0.6692669 2.7446729
#> 6 4.2109032 17.858701 -0.3370763 1.3993853
set.seed(456)
data_large <- samplezoo("large")
head(data_large)
#> norm norm_2 norm_3 bern neg pois exp unif beta
#> 1 29.84718 68.13494 7.9885694 0 0 5 3.4417303 0.8866347 0.05413307
#> 2 59.32663 52.32066 21.2526086 0 3 3 0.8114356 0.7976466 0.07195440
#> 3 62.01312 62.47569 38.4789563 0 2 6 46.8038907 0.6469920 0.22555129
#> 4 29.16661 53.51086 -0.8656269 0 1 5 11.6955326 0.2036753 0.71455809
#> 5 39.28465 47.19406 47.7819258 1 1 1 0.3535625 0.3653401 0.34619912
#> 6 45.13908 63.33566 53.3620528 1 1 2 4.5592136 0.7628573 0.25880522
#> gamma chi_sq t_dist f_dist
#> 1 6.7914120 4.464348 -1.0150596 2.2557295
#> 2 3.0132520 8.062120 0.3262369 1.4955877
#> 3 4.7360954 10.969593 1.5141157 1.0766901
#> 4 5.1235878 6.249247 0.6432708 1.1251542
#> 5 6.6851637 4.358815 0.2025742 0.4754946
#> 6 0.3903841 20.019575 1.6257109 0.6653886