{ "cells": [ { "cell_type": "markdown", "id": "142d241f-2ce8-424e-8828-9505488d2b39", "metadata": {}, "source": [ "# Forecasting time series with Ngboost regressor (Python version)\n", "\n", "This notebook provides some examples of how the functions in the `ngboost_models.py` module can be used. The functions in this module allow the application of the ngboost regressor model. There are separate methods to train and evaluate (separate the data in train and test datasets), train with all the data available, and make forecasts." ] }, { "cell_type": "code", "execution_count": 2, "id": "c2fe963b", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from epigraphhub.analysis.forecast_models.plots import * \n", "from epigraphhub.analysis.preprocessing import * \n", "from epigraphhub.analysis.forecast_models.ngboost_models import * \n" ] }, { "cell_type": "markdown", "id": "a9c88faf-38eb-4824-bf41-0e713f0722d4", "metadata": {}, "source": [ "In this tutorial, we will use the data saved in the path: `./data/data_GE.csv`. This table represents the number of tests, cases, and hospitalizations (your values by day and differences in first and second order) for some cantons in Switzerland. " ] }, { "cell_type": "code", "execution_count": 3, "id": "23fd777a", "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", " | test_FR | \n", "diff_test_FR | \n", "diff_2_test_FR | \n", "test_NE | \n", "diff_test_NE | \n", "diff_2_test_NE | \n", "test_TI | \n", "diff_test_TI | \n", "diff_2_test_TI | \n", "test_VD | \n", "... | \n", "hosp_NE | \n", "diff_hosp_NE | \n", "diff_2_hosp_NE | \n", "hosp_FR | \n", "diff_hosp_FR | \n", "diff_2_hosp_FR | \n", "hosp_GE | \n", "diff_hosp_GE | \n", "diff_2_hosp_GE | \n", "vac_all | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
datum | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
2020-03-01 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.142857 | \n", "0.000000 | \n", "0.000000 | \n", "0.428571 | \n", "0.142857 | \n", "0.285714 | \n", "0.428571 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "
2020-03-02 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.285714 | \n", "0.142857 | \n", "0.142857 | \n", "0.857143 | \n", "0.428571 | \n", "0.571429 | \n", "0.428571 | \n", "0.000000 | \n", "0.142857 | \n", "0.0 | \n", "
2020-03-03 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.428571 | \n", "0.142857 | \n", "0.285714 | \n", "0.857143 | \n", "0.000000 | \n", "0.428571 | \n", "0.428571 | \n", "0.000000 | \n", "0.000000 | \n", "0.0 | \n", "
2020-03-04 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.285714 | \n", "-0.142857 | \n", "0.000000 | \n", "0.714286 | \n", "-0.142857 | \n", "-0.142857 | \n", "0.571429 | \n", "0.142857 | \n", "0.142857 | \n", "0.0 | \n", "
2020-03-05 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.428571 | \n", "0.142857 | \n", "0.000000 | \n", "1.000000 | \n", "0.285714 | \n", "0.142857 | \n", "0.857143 | \n", "0.285714 | \n", "0.428571 | \n", "0.0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2022-08-26 | \n", "165.857143 | \n", "-0.714286 | \n", "-9.000000 | \n", "137.142857 | \n", "1.000000 | \n", "3.857143 | \n", "406.428571 | \n", "-7.714286 | \n", "-13.714286 | \n", "650.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.142857 | \n", "-0.428571 | \n", "-0.428571 | \n", "2.571429 | \n", "-0.142857 | \n", "0.142857 | \n", "182.8 | \n", "
2022-08-27 | \n", "165.142857 | \n", "-0.714286 | \n", "-1.428571 | \n", "137.285714 | \n", "0.142857 | \n", "1.142857 | \n", "405.285714 | \n", "-1.142857 | \n", "-8.857143 | \n", "650.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.142857 | \n", "0.000000 | \n", "-0.428571 | \n", "2.571429 | \n", "0.000000 | \n", "-0.142857 | \n", "182.8 | \n", "
2022-08-28 | \n", "159.000000 | \n", "-6.142857 | \n", "-6.857143 | \n", "135.428571 | \n", "-1.857143 | \n", "-1.714286 | \n", "407.571429 | \n", "2.285714 | \n", "1.142857 | \n", "645.142857 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.142857 | \n", "0.000000 | \n", "0.000000 | \n", "2.142857 | \n", "-0.428571 | \n", "-0.428571 | \n", "182.8 | \n", "
2022-08-29 | \n", "151.857143 | \n", "-7.142857 | \n", "-13.285714 | \n", "130.142857 | \n", "-5.285714 | \n", "-7.142857 | \n", "360.714286 | \n", "-46.857143 | \n", "-44.571429 | \n", "615.714286 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.142857 | \n", "0.000000 | \n", "0.000000 | \n", "1.857143 | \n", "-0.285714 | \n", "-0.714286 | \n", "182.8 | \n", "
2022-08-30 | \n", "123.857143 | \n", "-6.714286 | \n", "-28.142857 | \n", "107.142857 | \n", "-6.285714 | \n", "-22.857143 | \n", "293.571429 | \n", "-41.714286 | \n", "-82.285714 | \n", "505.714286 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.142857 | \n", "0.000000 | \n", "0.000000 | \n", "1.428571 | \n", "-0.428571 | \n", "-0.714286 | \n", "182.8 | \n", "
913 rows × 64 columns
\n", "\n", " | target | \n", "lower | \n", "median | \n", "upper | \n", "train_size | \n", "
---|---|---|---|---|---|
date | \n", "\n", " | \n", " | \n", " | \n", " | \n", " |
2020-05-02 | \n", "0.285714 | \n", "0.171583 | \n", "0.300695 | \n", "0.526959 | \n", "584 | \n", "
2020-05-03 | \n", "0.285714 | \n", "0.188583 | \n", "0.363243 | \n", "0.699668 | \n", "584 | \n", "
2020-05-04 | \n", "0.142857 | \n", "0.077129 | \n", "0.199622 | \n", "0.516653 | \n", "584 | \n", "
2020-05-05 | \n", "0.142857 | \n", "0.074032 | \n", "0.222506 | \n", "0.668752 | \n", "584 | \n", "
2020-05-06 | \n", "0.142857 | \n", "0.083643 | \n", "0.205801 | \n", "0.506366 | \n", "584 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
2022-04-26 | \n", "7.714286 | \n", "1.810433 | \n", "3.324867 | \n", "6.106128 | \n", "584 | \n", "
2022-04-27 | \n", "6.857143 | \n", "2.234695 | \n", "3.160372 | \n", "4.469492 | \n", "584 | \n", "
2022-04-28 | \n", "5.714286 | \n", "2.208807 | \n", "3.287218 | \n", "4.892146 | \n", "584 | \n", "
2022-04-29 | \n", "5.000000 | \n", "1.947474 | \n", "3.324971 | \n", "5.676806 | \n", "584 | \n", "
2022-04-30 | \n", "5.714286 | \n", "3.261146 | \n", "5.104967 | \n", "7.991267 | \n", "584 | \n", "
729 rows × 5 columns
\n", "\n", " | lower | \n", "median | \n", "upper | \n", "
---|---|---|---|
date | \n", "\n", " | \n", " | \n", " |
2022-05-01 | \n", "3.659357 | \n", "4.477327 | \n", "5.478137 | \n", "
2022-05-02 | \n", "3.678122 | \n", "4.535233 | \n", "5.592075 | \n", "
2022-05-03 | \n", "3.674208 | \n", "4.571385 | \n", "5.687637 | \n", "
2022-05-04 | \n", "3.513954 | \n", "4.615518 | \n", "6.062404 | \n", "
2022-05-05 | \n", "3.800476 | \n", "4.725615 | \n", "5.875958 | \n", "