nokken/native/statistics/time_series.cpp
2025-04-20 11:17:03 -04:00

272 lines
No EOL
9.5 KiB
C++

// SPDX-FileCopyrightText: © 2025 Nøkken.io <nokken.io@proton.me>
// SPDX-License-Identifier: AGPL-3.0
//
// time_series.cpp
// Implementation of time series analysis functions
//
#include "health_analytics_engine.h"
#include "utils.h"
/**
* @brief Detect trends in time series data
*
* @param values Time series data
* @param length Number of elements in the array
* @param strength Output parameter for trend strength
* @return TrendType Enum indicating trend direction and type
*/
TrendType detect_trend(const double* values, int length, double* strength) {
if (length < 3) {
*strength = 0;
return TREND_NONE;
}
// Generate time vector (0, 1, 2, ...)
std::vector<double> time(length);
for (int i = 0; i < length; i++) {
time[i] = i;
}
// Calculate linear regression
double slope, intercept, r_squared;
if (!calculateLinearRegression(time.data(), values, length, slope, intercept, r_squared)) {
*strength = 0;
return TREND_NONE;
}
// Detrend the data for further analysis
std::vector<double> detrended(length);
for (int i = 0; i < length; i++) {
detrended[i] = values[i] - (intercept + slope * i);
}
// Check for cyclical patterns using autocorrelation
bool has_cycle = false;
int cycle_length = 0;
double max_autocorr = 0;
// Check autocorrelation for various lags
const int MIN_LAG = 2;
const int MAX_LAG = length / 3; // Look for cycles up to 1/3 of series length
for (int lag = MIN_LAG; lag < MAX_LAG; lag++) {
double autocorr = calculateAutocorrelation(detrended.data(), length, lag);
// If strong positive autocorrelation found
if (autocorr > 0.3 && autocorr > max_autocorr) {
max_autocorr = autocorr;
cycle_length = lag;
has_cycle = true;
}
}
// Check if cycle pattern is stronger than linear trend
if (has_cycle && max_autocorr > std::abs(r_squared)) {
*strength = max_autocorr;
return TREND_CYCLIC;
}
// Determine trend direction based on slope and strength
*strength = std::abs(r_squared);
// Require minimum strength to declare a trend
if (*strength < 0.2) {
return TREND_NONE;
} else if (slope > 0) {
return TREND_INCREASING;
} else {
return TREND_DECREASING;
}
}
/**
* @brief Predict future values of a time series using ARIMA-like approach
*
* @param timeSeries Time series data
* @param dataLength Length of time series
* @param stepsAhead Number of future steps to predict
* @param factorName Name of the factor being predicted
* @return TimeSeriesForecast Structure containing predictions and confidence intervals
*/
TimeSeriesForecast predict_time_series(const double* timeSeries,
int dataLength,
int stepsAhead,
const char* factorName) {
TimeSeriesForecast forecast;
memset(&forecast, 0, sizeof(TimeSeriesForecast));
if (dataLength < 5 || stepsAhead <= 0) {
forecast.overallConfidence = 0;
return forecast;
}
// Copy factor name
strncpy(forecast.factorName, factorName, MAX_STRING_SIZE - 1);
forecast.factorName[MAX_STRING_SIZE - 1] = '\0';
// First, check for seasonality
int potentialSeasonality = 0;
double maxAutocorr = 0;
// Look for seasonality in range 2 to dataLength/3
for (int lag = 2; lag <= dataLength/3; lag++) {
double acf = calculateAutocorrelation(timeSeries, dataLength, lag);
if (acf > 0.3 && acf > maxAutocorr) {
maxAutocorr = acf;
potentialSeasonality = lag;
}
}
// Set seasonality period if detected
forecast.seasonalityPeriod = potentialSeasonality;
// Decompose time series if seasonality detected
std::vector<double> trend(dataLength);
std::vector<double> seasonal(dataLength);
std::vector<double> residual(dataLength);
bool hasSeasonality = potentialSeasonality > 0 && maxAutocorr > 0.3;
if (hasSeasonality) {
// Decompose the time series
decomposeTimeSeries(timeSeries, dataLength, potentialSeasonality,
trend.data(), seasonal.data(), residual.data());
} else {
// No seasonality, just use simple moving average for trend
calculateMovingAverage(timeSeries, dataLength, std::min(7, dataLength/3), trend.data());
// No seasonal component
for (int i = 0; i < dataLength; i++) {
seasonal[i] = 0;
residual[i] = timeSeries[i] - trend[i];
}
}
// Fit AR model to residuals for short-term dynamics
// Determine optimal AR order using PACF
int maxLag = std::min(10, dataLength/5);
std::vector<double> pacf(maxLag + 1);
calculatePACF(residual.data(), dataLength, maxLag, pacf.data());
// Find significant AR terms (PACF > 0.2)
std::vector<int> significantLags;
for (int i = 1; i <= maxLag; i++) {
if (std::abs(pacf[i]) > 0.2) {
significantLags.push_back(i);
}
}
// Limit to 3 most significant terms
if (significantLags.size() > 3) {
std::sort(significantLags.begin(), significantLags.end(),
[&pacf](int a, int b) {
return std::abs(pacf[a]) > std::abs(pacf[b]);
});
significantLags.resize(3);
}
// Fit AR coefficients using linear regression
int arOrder = significantLags.size();
std::vector<double> arCoefficients(arOrder, 0);
if (arOrder > 0) {
// Prepare training data for AR model
int trainingSize = dataLength - significantLags.back();
std::vector<std::vector<double>> X(trainingSize, std::vector<double>(arOrder));
std::vector<double> y(trainingSize);
for (int i = 0; i < trainingSize; i++) {
int t = i + significantLags.back();
y[i] = residual[t];
for (int j = 0; j < arOrder; j++) {
X[i][j] = residual[t - significantLags[j]];
}
}
// Very simplified AR coefficient estimation
// Real implementation would use matrix operations
for (int j = 0; j < arOrder; j++) {
double sumXY = 0, sumX2 = 0;
for (int i = 0; i < trainingSize; i++) {
sumXY += X[i][j] * y[i];
sumX2 += X[i][j] * X[i][j];
}
if (sumX2 > 0) {
arCoefficients[j] = sumXY / sumX2;
}
}
}
// Set time unit (days by default)
forecast.timeUnit = TIME_UNIT_DAYS;
// Generate forecasts
double trendGrowth = 0;
if (dataLength > 10) {
// Calculate average trend growth over last 10 points
trendGrowth = (trend[dataLength-1] - trend[dataLength-11]) / 10.0;
}
// Last observed values
std::vector<double> lastResiduals(dataLength);
for (int i = 0; i < dataLength; i++) {
lastResiduals[i] = residual[i];
}
// Generate predictions
for (int i = 0; i < stepsAhead && i < 30; i++) {
int t = dataLength + i;
// Forecast trend component
double trendForecast = trend[dataLength-1] + trendGrowth * (i + 1);
// Forecast seasonal component (if any)
double seasonalForecast = 0;
if (hasSeasonality && potentialSeasonality > 0) {
seasonalForecast = seasonal[dataLength - potentialSeasonality + (i % potentialSeasonality)];
}
// Forecast residual component using AR model
double residualForecast = 0;
for (int j = 0; j < arOrder; j++) {
int lag = significantLags[j];
if (i >= lag) {
// Use previously forecasted residuals
residualForecast += arCoefficients[j] * lastResiduals[dataLength + i - lag];
} else {
// Use observed residuals
residualForecast += arCoefficients[j] * residual[dataLength - lag + i];
}
}
// Store forecasted residual
lastResiduals.push_back(residualForecast);
// Combine components for final forecast
forecast.predictions[i] = trendForecast + seasonalForecast + residualForecast;
// Calculate confidence intervals (widen with forecast horizon)
double stdError = 0;
for (int j = 0; j < dataLength; j++) {
stdError += residual[j] * residual[j];
}
stdError = sqrt(stdError / dataLength);
// Wider intervals for longer forecasts
double multiplier = 1.96 * sqrt(1.0 + 0.25 * i); // Roughly 95% CI with growing uncertainty
forecast.confidenceIntervals[i][0] = forecast.predictions[i] - multiplier * stdError;
forecast.confidenceIntervals[i][1] = forecast.predictions[i] + multiplier * stdError;
}
// Set overall confidence based on model quality and forecast distance
double modelAccuracy = 0.8; // Would be calculated from validation in real model
if (hasSeasonality) modelAccuracy += 0.1;
if (arOrder > 0) modelAccuracy += 0.1 * std::min(arOrder, 2);
forecast.overallConfidence = modelAccuracy * exp(-0.05 * stepsAhead);
if (forecast.overallConfidence > 0.95) forecast.overallConfidence = 0.95;
if (forecast.overallConfidence < 0.2) forecast.overallConfidence = 0.2;
return forecast;
}