// SPDX-FileCopyrightText: © 2025 Nøkken.io // SPDX-License-Identifier: AGPL-3.0 // // time_series.cpp // Implementation of time series analysis functions // #include "health_analytics_engine.h" #include "utils.h" /** * @brief Detect trends in time series data * * @param values Time series data * @param length Number of elements in the array * @param strength Output parameter for trend strength * @return TrendType Enum indicating trend direction and type */ TrendType detect_trend(const double* values, int length, double* strength) { if (length < 3) { *strength = 0; return TREND_NONE; } // Generate time vector (0, 1, 2, ...) std::vector time(length); for (int i = 0; i < length; i++) { time[i] = i; } // Calculate linear regression double slope, intercept, r_squared; if (!calculateLinearRegression(time.data(), values, length, slope, intercept, r_squared)) { *strength = 0; return TREND_NONE; } // Detrend the data for further analysis std::vector detrended(length); for (int i = 0; i < length; i++) { detrended[i] = values[i] - (intercept + slope * i); } // Check for cyclical patterns using autocorrelation bool has_cycle = false; int cycle_length = 0; double max_autocorr = 0; // Check autocorrelation for various lags const int MIN_LAG = 2; const int MAX_LAG = length / 3; // Look for cycles up to 1/3 of series length for (int lag = MIN_LAG; lag < MAX_LAG; lag++) { double autocorr = calculateAutocorrelation(detrended.data(), length, lag); // If strong positive autocorrelation found if (autocorr > 0.3 && autocorr > max_autocorr) { max_autocorr = autocorr; cycle_length = lag; has_cycle = true; } } // Check if cycle pattern is stronger than linear trend if (has_cycle && max_autocorr > std::abs(r_squared)) { *strength = max_autocorr; return TREND_CYCLIC; } // Determine trend direction based on slope and strength *strength = std::abs(r_squared); // Require minimum strength to declare a trend if (*strength < 0.2) { return TREND_NONE; } else if (slope > 0) { return TREND_INCREASING; } else { return TREND_DECREASING; } } /** * @brief Predict future values of a time series using ARIMA-like approach * * @param timeSeries Time series data * @param dataLength Length of time series * @param stepsAhead Number of future steps to predict * @param factorName Name of the factor being predicted * @return TimeSeriesForecast Structure containing predictions and confidence intervals */ TimeSeriesForecast predict_time_series(const double* timeSeries, int dataLength, int stepsAhead, const char* factorName) { TimeSeriesForecast forecast; memset(&forecast, 0, sizeof(TimeSeriesForecast)); if (dataLength < 5 || stepsAhead <= 0) { forecast.overallConfidence = 0; return forecast; } // Copy factor name strncpy(forecast.factorName, factorName, MAX_STRING_SIZE - 1); forecast.factorName[MAX_STRING_SIZE - 1] = '\0'; // First, check for seasonality int potentialSeasonality = 0; double maxAutocorr = 0; // Look for seasonality in range 2 to dataLength/3 for (int lag = 2; lag <= dataLength/3; lag++) { double acf = calculateAutocorrelation(timeSeries, dataLength, lag); if (acf > 0.3 && acf > maxAutocorr) { maxAutocorr = acf; potentialSeasonality = lag; } } // Set seasonality period if detected forecast.seasonalityPeriod = potentialSeasonality; // Decompose time series if seasonality detected std::vector trend(dataLength); std::vector seasonal(dataLength); std::vector residual(dataLength); bool hasSeasonality = potentialSeasonality > 0 && maxAutocorr > 0.3; if (hasSeasonality) { // Decompose the time series decomposeTimeSeries(timeSeries, dataLength, potentialSeasonality, trend.data(), seasonal.data(), residual.data()); } else { // No seasonality, just use simple moving average for trend calculateMovingAverage(timeSeries, dataLength, std::min(7, dataLength/3), trend.data()); // No seasonal component for (int i = 0; i < dataLength; i++) { seasonal[i] = 0; residual[i] = timeSeries[i] - trend[i]; } } // Fit AR model to residuals for short-term dynamics // Determine optimal AR order using PACF int maxLag = std::min(10, dataLength/5); std::vector pacf(maxLag + 1); calculatePACF(residual.data(), dataLength, maxLag, pacf.data()); // Find significant AR terms (PACF > 0.2) std::vector significantLags; for (int i = 1; i <= maxLag; i++) { if (std::abs(pacf[i]) > 0.2) { significantLags.push_back(i); } } // Limit to 3 most significant terms if (significantLags.size() > 3) { std::sort(significantLags.begin(), significantLags.end(), [&pacf](int a, int b) { return std::abs(pacf[a]) > std::abs(pacf[b]); }); significantLags.resize(3); } // Fit AR coefficients using linear regression int arOrder = significantLags.size(); std::vector arCoefficients(arOrder, 0); if (arOrder > 0) { // Prepare training data for AR model int trainingSize = dataLength - significantLags.back(); std::vector> X(trainingSize, std::vector(arOrder)); std::vector y(trainingSize); for (int i = 0; i < trainingSize; i++) { int t = i + significantLags.back(); y[i] = residual[t]; for (int j = 0; j < arOrder; j++) { X[i][j] = residual[t - significantLags[j]]; } } // Very simplified AR coefficient estimation // Real implementation would use matrix operations for (int j = 0; j < arOrder; j++) { double sumXY = 0, sumX2 = 0; for (int i = 0; i < trainingSize; i++) { sumXY += X[i][j] * y[i]; sumX2 += X[i][j] * X[i][j]; } if (sumX2 > 0) { arCoefficients[j] = sumXY / sumX2; } } } // Set time unit (days by default) forecast.timeUnit = TIME_UNIT_DAYS; // Generate forecasts double trendGrowth = 0; if (dataLength > 10) { // Calculate average trend growth over last 10 points trendGrowth = (trend[dataLength-1] - trend[dataLength-11]) / 10.0; } // Last observed values std::vector lastResiduals(dataLength); for (int i = 0; i < dataLength; i++) { lastResiduals[i] = residual[i]; } // Generate predictions for (int i = 0; i < stepsAhead && i < 30; i++) { int t = dataLength + i; // Forecast trend component double trendForecast = trend[dataLength-1] + trendGrowth * (i + 1); // Forecast seasonal component (if any) double seasonalForecast = 0; if (hasSeasonality && potentialSeasonality > 0) { seasonalForecast = seasonal[dataLength - potentialSeasonality + (i % potentialSeasonality)]; } // Forecast residual component using AR model double residualForecast = 0; for (int j = 0; j < arOrder; j++) { int lag = significantLags[j]; if (i >= lag) { // Use previously forecasted residuals residualForecast += arCoefficients[j] * lastResiduals[dataLength + i - lag]; } else { // Use observed residuals residualForecast += arCoefficients[j] * residual[dataLength - lag + i]; } } // Store forecasted residual lastResiduals.push_back(residualForecast); // Combine components for final forecast forecast.predictions[i] = trendForecast + seasonalForecast + residualForecast; // Calculate confidence intervals (widen with forecast horizon) double stdError = 0; for (int j = 0; j < dataLength; j++) { stdError += residual[j] * residual[j]; } stdError = sqrt(stdError / dataLength); // Wider intervals for longer forecasts double multiplier = 1.96 * sqrt(1.0 + 0.25 * i); // Roughly 95% CI with growing uncertainty forecast.confidenceIntervals[i][0] = forecast.predictions[i] - multiplier * stdError; forecast.confidenceIntervals[i][1] = forecast.predictions[i] + multiplier * stdError; } // Set overall confidence based on model quality and forecast distance double modelAccuracy = 0.8; // Would be calculated from validation in real model if (hasSeasonality) modelAccuracy += 0.1; if (arOrder > 0) modelAccuracy += 0.1 * std::min(arOrder, 2); forecast.overallConfidence = modelAccuracy * exp(-0.05 * stepsAhead); if (forecast.overallConfidence > 0.95) forecast.overallConfidence = 0.95; if (forecast.overallConfidence < 0.2) forecast.overallConfidence = 0.2; return forecast; }