272 lines
No EOL
9.5 KiB
C++
272 lines
No EOL
9.5 KiB
C++
// SPDX-FileCopyrightText: © 2025 Nøkken.io <nokken.io@proton.me>
|
|
// SPDX-License-Identifier: AGPL-3.0
|
|
//
|
|
// time_series.cpp
|
|
// Implementation of time series analysis functions
|
|
//
|
|
#include "health_analytics_engine.h"
|
|
#include "utils.h"
|
|
/**
|
|
* @brief Detect trends in time series data
|
|
*
|
|
* @param values Time series data
|
|
* @param length Number of elements in the array
|
|
* @param strength Output parameter for trend strength
|
|
* @return TrendType Enum indicating trend direction and type
|
|
*/
|
|
TrendType detect_trend(const double* values, int length, double* strength) {
|
|
if (length < 3) {
|
|
*strength = 0;
|
|
return TREND_NONE;
|
|
}
|
|
|
|
// Generate time vector (0, 1, 2, ...)
|
|
std::vector<double> time(length);
|
|
for (int i = 0; i < length; i++) {
|
|
time[i] = i;
|
|
}
|
|
|
|
// Calculate linear regression
|
|
double slope, intercept, r_squared;
|
|
if (!calculateLinearRegression(time.data(), values, length, slope, intercept, r_squared)) {
|
|
*strength = 0;
|
|
return TREND_NONE;
|
|
}
|
|
|
|
// Detrend the data for further analysis
|
|
std::vector<double> detrended(length);
|
|
for (int i = 0; i < length; i++) {
|
|
detrended[i] = values[i] - (intercept + slope * i);
|
|
}
|
|
|
|
// Check for cyclical patterns using autocorrelation
|
|
bool has_cycle = false;
|
|
int cycle_length = 0;
|
|
double max_autocorr = 0;
|
|
|
|
// Check autocorrelation for various lags
|
|
const int MIN_LAG = 2;
|
|
const int MAX_LAG = length / 3; // Look for cycles up to 1/3 of series length
|
|
|
|
for (int lag = MIN_LAG; lag < MAX_LAG; lag++) {
|
|
double autocorr = calculateAutocorrelation(detrended.data(), length, lag);
|
|
|
|
// If strong positive autocorrelation found
|
|
if (autocorr > 0.3 && autocorr > max_autocorr) {
|
|
max_autocorr = autocorr;
|
|
cycle_length = lag;
|
|
has_cycle = true;
|
|
}
|
|
}
|
|
|
|
// Check if cycle pattern is stronger than linear trend
|
|
if (has_cycle && max_autocorr > std::abs(r_squared)) {
|
|
*strength = max_autocorr;
|
|
return TREND_CYCLIC;
|
|
}
|
|
|
|
// Determine trend direction based on slope and strength
|
|
*strength = std::abs(r_squared);
|
|
|
|
// Require minimum strength to declare a trend
|
|
if (*strength < 0.2) {
|
|
return TREND_NONE;
|
|
} else if (slope > 0) {
|
|
return TREND_INCREASING;
|
|
} else {
|
|
return TREND_DECREASING;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief Predict future values of a time series using ARIMA-like approach
|
|
*
|
|
* @param timeSeries Time series data
|
|
* @param dataLength Length of time series
|
|
* @param stepsAhead Number of future steps to predict
|
|
* @param factorName Name of the factor being predicted
|
|
* @return TimeSeriesForecast Structure containing predictions and confidence intervals
|
|
*/
|
|
TimeSeriesForecast predict_time_series(const double* timeSeries,
|
|
int dataLength,
|
|
int stepsAhead,
|
|
const char* factorName) {
|
|
TimeSeriesForecast forecast;
|
|
memset(&forecast, 0, sizeof(TimeSeriesForecast));
|
|
|
|
if (dataLength < 5 || stepsAhead <= 0) {
|
|
forecast.overallConfidence = 0;
|
|
return forecast;
|
|
}
|
|
|
|
// Copy factor name
|
|
strncpy(forecast.factorName, factorName, MAX_STRING_SIZE - 1);
|
|
forecast.factorName[MAX_STRING_SIZE - 1] = '\0';
|
|
|
|
// First, check for seasonality
|
|
int potentialSeasonality = 0;
|
|
double maxAutocorr = 0;
|
|
|
|
// Look for seasonality in range 2 to dataLength/3
|
|
for (int lag = 2; lag <= dataLength/3; lag++) {
|
|
double acf = calculateAutocorrelation(timeSeries, dataLength, lag);
|
|
if (acf > 0.3 && acf > maxAutocorr) {
|
|
maxAutocorr = acf;
|
|
potentialSeasonality = lag;
|
|
}
|
|
}
|
|
|
|
// Set seasonality period if detected
|
|
forecast.seasonalityPeriod = potentialSeasonality;
|
|
|
|
// Decompose time series if seasonality detected
|
|
std::vector<double> trend(dataLength);
|
|
std::vector<double> seasonal(dataLength);
|
|
std::vector<double> residual(dataLength);
|
|
|
|
bool hasSeasonality = potentialSeasonality > 0 && maxAutocorr > 0.3;
|
|
|
|
if (hasSeasonality) {
|
|
// Decompose the time series
|
|
decomposeTimeSeries(timeSeries, dataLength, potentialSeasonality,
|
|
trend.data(), seasonal.data(), residual.data());
|
|
} else {
|
|
// No seasonality, just use simple moving average for trend
|
|
calculateMovingAverage(timeSeries, dataLength, std::min(7, dataLength/3), trend.data());
|
|
|
|
// No seasonal component
|
|
for (int i = 0; i < dataLength; i++) {
|
|
seasonal[i] = 0;
|
|
residual[i] = timeSeries[i] - trend[i];
|
|
}
|
|
}
|
|
|
|
// Fit AR model to residuals for short-term dynamics
|
|
// Determine optimal AR order using PACF
|
|
int maxLag = std::min(10, dataLength/5);
|
|
std::vector<double> pacf(maxLag + 1);
|
|
calculatePACF(residual.data(), dataLength, maxLag, pacf.data());
|
|
|
|
// Find significant AR terms (PACF > 0.2)
|
|
std::vector<int> significantLags;
|
|
for (int i = 1; i <= maxLag; i++) {
|
|
if (std::abs(pacf[i]) > 0.2) {
|
|
significantLags.push_back(i);
|
|
}
|
|
}
|
|
|
|
// Limit to 3 most significant terms
|
|
if (significantLags.size() > 3) {
|
|
std::sort(significantLags.begin(), significantLags.end(),
|
|
[&pacf](int a, int b) {
|
|
return std::abs(pacf[a]) > std::abs(pacf[b]);
|
|
});
|
|
significantLags.resize(3);
|
|
}
|
|
|
|
// Fit AR coefficients using linear regression
|
|
int arOrder = significantLags.size();
|
|
std::vector<double> arCoefficients(arOrder, 0);
|
|
|
|
if (arOrder > 0) {
|
|
// Prepare training data for AR model
|
|
int trainingSize = dataLength - significantLags.back();
|
|
std::vector<std::vector<double>> X(trainingSize, std::vector<double>(arOrder));
|
|
std::vector<double> y(trainingSize);
|
|
|
|
for (int i = 0; i < trainingSize; i++) {
|
|
int t = i + significantLags.back();
|
|
y[i] = residual[t];
|
|
|
|
for (int j = 0; j < arOrder; j++) {
|
|
X[i][j] = residual[t - significantLags[j]];
|
|
}
|
|
}
|
|
|
|
// Very simplified AR coefficient estimation
|
|
// Real implementation would use matrix operations
|
|
for (int j = 0; j < arOrder; j++) {
|
|
double sumXY = 0, sumX2 = 0;
|
|
for (int i = 0; i < trainingSize; i++) {
|
|
sumXY += X[i][j] * y[i];
|
|
sumX2 += X[i][j] * X[i][j];
|
|
}
|
|
if (sumX2 > 0) {
|
|
arCoefficients[j] = sumXY / sumX2;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Set time unit (days by default)
|
|
forecast.timeUnit = TIME_UNIT_DAYS;
|
|
|
|
// Generate forecasts
|
|
double trendGrowth = 0;
|
|
if (dataLength > 10) {
|
|
// Calculate average trend growth over last 10 points
|
|
trendGrowth = (trend[dataLength-1] - trend[dataLength-11]) / 10.0;
|
|
}
|
|
|
|
// Last observed values
|
|
std::vector<double> lastResiduals(dataLength);
|
|
for (int i = 0; i < dataLength; i++) {
|
|
lastResiduals[i] = residual[i];
|
|
}
|
|
|
|
// Generate predictions
|
|
for (int i = 0; i < stepsAhead && i < 30; i++) {
|
|
int t = dataLength + i;
|
|
|
|
// Forecast trend component
|
|
double trendForecast = trend[dataLength-1] + trendGrowth * (i + 1);
|
|
|
|
// Forecast seasonal component (if any)
|
|
double seasonalForecast = 0;
|
|
if (hasSeasonality && potentialSeasonality > 0) {
|
|
seasonalForecast = seasonal[dataLength - potentialSeasonality + (i % potentialSeasonality)];
|
|
}
|
|
|
|
// Forecast residual component using AR model
|
|
double residualForecast = 0;
|
|
for (int j = 0; j < arOrder; j++) {
|
|
int lag = significantLags[j];
|
|
if (i >= lag) {
|
|
// Use previously forecasted residuals
|
|
residualForecast += arCoefficients[j] * lastResiduals[dataLength + i - lag];
|
|
} else {
|
|
// Use observed residuals
|
|
residualForecast += arCoefficients[j] * residual[dataLength - lag + i];
|
|
}
|
|
}
|
|
|
|
// Store forecasted residual
|
|
lastResiduals.push_back(residualForecast);
|
|
|
|
// Combine components for final forecast
|
|
forecast.predictions[i] = trendForecast + seasonalForecast + residualForecast;
|
|
|
|
// Calculate confidence intervals (widen with forecast horizon)
|
|
double stdError = 0;
|
|
for (int j = 0; j < dataLength; j++) {
|
|
stdError += residual[j] * residual[j];
|
|
}
|
|
stdError = sqrt(stdError / dataLength);
|
|
|
|
// Wider intervals for longer forecasts
|
|
double multiplier = 1.96 * sqrt(1.0 + 0.25 * i); // Roughly 95% CI with growing uncertainty
|
|
|
|
forecast.confidenceIntervals[i][0] = forecast.predictions[i] - multiplier * stdError;
|
|
forecast.confidenceIntervals[i][1] = forecast.predictions[i] + multiplier * stdError;
|
|
}
|
|
|
|
// Set overall confidence based on model quality and forecast distance
|
|
double modelAccuracy = 0.8; // Would be calculated from validation in real model
|
|
if (hasSeasonality) modelAccuracy += 0.1;
|
|
if (arOrder > 0) modelAccuracy += 0.1 * std::min(arOrder, 2);
|
|
|
|
forecast.overallConfidence = modelAccuracy * exp(-0.05 * stepsAhead);
|
|
if (forecast.overallConfidence > 0.95) forecast.overallConfidence = 0.95;
|
|
if (forecast.overallConfidence < 0.2) forecast.overallConfidence = 0.2;
|
|
|
|
return forecast;
|
|
} |