Modular Time-Series Forecasting for PyTorch
Build forecasting pipelines with a minimal PyTorch core, then opt into preprocessing, DARTS architecture search, MLTracker, and companion tooling only when you need them.
Up and running in seconds. Install from PyPI or clone from GitHub.
Recommended
pip install foreblocks
Requires Python ≥ 3.10. Add extras for preprocessing, DARTS, tracking, or benchmarking when needed.
Latest dev version
git clone https://github.com/lseman/foreblocks
cd foreblocks
pip install -e .
Start with the stable ForecastingModel + Trainer path, then branch into
preprocessing, transformer internals, or the staged DARTS workflow through dedicated guides and tutorials.
Everything you need for research-grade time-series work in one composable library.
One nn.Module class powers all strategies — seq2seq, autoregressive, direct, transformer_seq2seq — via a unified forward pass.
Mix and match LSTMEncoder, TransformerEncoder, Fourier, Wavelet, TCN, xLSTM, and Graph blocks as encoders or preprocessing heads.
RevinHead, DAINHead, DecompositionHead, PatchEmbedHead, Time2VecHead, FFTTopKHead — attach at input, output, or input_norm.
Built-in uncertainty intervals via Trainer: Split, EnbPI, Rolling ACI, AgACI, CPTC, AFOCP — enabled with one config flag.
HeadComposer learns differentiable weights over head candidates. Enable with train_nas=True in TrainingConfig for automatic architecture search.
AMP, gradient clipping, LR schedulers, early stopping, ModelEvaluator, and automatic SQLite experiment tracking via MLTracker.
Compose encoders, decoders, and heads into a ForecastingModel in just a few lines.
Compose encoder + decoder + train with Trainer
from foreblocks import ForecastingModel, Trainer, TrainingConfig
from foreblocks.blocks.enc_dec import LSTMEncoder, LSTMDecoder
from foreblocks.aux import create_dataloaders
enc = LSTMEncoder(input_size=3, hidden_size=64, num_layers=2)
dec = LSTMDecoder(input_size=1, hidden_size=64, output_size=1, num_layers=2)
model = ForecastingModel(
encoder=enc, decoder=dec,
forecasting_strategy="seq2seq", model_type="lstm",
target_len=24, output_size=1,
)
train_loader, val_loader = create_dataloaders(X_train, y_train, batch_size=32)
trainer = Trainer(model, config=TrainingConfig(num_epochs=50, patience=10))
history = trainer.train(train_loader, val_loader)
Multi-head self-attention for long-range context
from foreblocks import ForecastingModel, Trainer, TrainingConfig
from foreblocks.tf.transformer import TransformerEncoder, TransformerDecoder
enc = TransformerEncoder(input_size=4, hidden_size=128,
nheads=8, num_layers=3, dim_feedforward=512)
dec = TransformerDecoder(input_size=4, hidden_size=128, output_size=4,
nheads=8, num_layers=3, dim_feedforward=512)
model = ForecastingModel(
encoder=enc, decoder=dec,
forecasting_strategy="transformer_seq2seq", model_type="transformer",
target_len=96, output_size=4,
)
cfg = TrainingConfig(num_epochs=100, learning_rate=1e-4, use_amp=True)
trainer = Trainer(model, config=cfg)
Post-hoc composition and uncertainty quantification
# Attach preprocessing heads post-hoc
from foreblocks.core.heads.revin_head import RevinHead
from foreblocks.core.att import AttentionLayer
model.add_head(RevinHead(input_size=3), position="input_norm")
model.add_head(
AttentionLayer(method="dot",
encoder_hidden_size=64,
decoder_hidden_size=64),
position="attention"
)
print(model.list_heads())
# Conformal prediction intervals
from foreblocks import TrainingConfig, Trainer
cfg = TrainingConfig(
conformal_enabled=True,
conformal_method="enbpi", # split | rolling | agaci | cptc ...
conformal_quantile=0.9,
)
trainer = Trainer(model, config=cfg)
trainer.calibrate_conformal(cal_loader)
lower, upper = trainer.predict_with_intervals(X_test)
Focused view of the implementation choices behind ForeBlocks' transformer stack and MoE path.
att_type + attention_mode routing.patch_encoder, patch_decoder) and CT-PatchTST mode for long context.norm_strategy, custom_norm) and SwiGLU feedforward.from foreblocks import TransformerEncoder
enc = TransformerEncoder(
input_size=8,
d_model=256,
nhead=8,
num_layers=4,
att_type="standard",
attention_mode="hybrid",
norm_strategy="pre_norm",
custom_norm="rms",
patch_encoder=True,
patch_len=16,
patch_stride=8,
use_layer_skipping=True,
)
use_moe=True.routing_mode, router_type, top_k, temperature/noise.load_balance_weight, z_loss_weight, and moe_aux_lambda.num_shared, shared_combine) for stable capacity scaling.from foreblocks import TransformerEncoder
enc = TransformerEncoder(
input_size=8,
d_model=256,
nhead=8,
num_layers=4,
use_moe=True,
num_experts=8,
top_k=2,
routing_mode="token_choice",
router_type="noisy_topk",
load_balance_weight=1e-2,
z_loss_weight=1e-3,
moe_aux_lambda=1.0,
)
Load data, configure, train, predict — that's the entire workflow.
import numpy as np
from foreblocks import ForecastingModel, Trainer, TrainingConfig
from foreblocks.blocks.enc_dec import LSTMEncoder, LSTMDecoder
from foreblocks.aux import create_dataloaders
# 1. Build encoder / decoder
enc = LSTMEncoder(input_size=4, hidden_size=64, num_layers=2)
dec = LSTMDecoder(input_size=1, hidden_size=64, output_size=1, num_layers=2)
# 2. Assemble ForecastingModel
model = ForecastingModel(
encoder=enc, decoder=dec,
forecasting_strategy="seq2seq",
model_type="lstm",
target_len=24, output_size=1,
)
# 3. Prepare data
X = np.random.randn(1000, 30, 4).astype("float32") # [samples, seq_len, features]
y = np.random.randn(1000, 24, 1).astype("float32") # [samples, target_len, outputs]
train_loader, val_loader = create_dataloaders(X, y, batch_size=32, val_split=0.2)
# 4. Train
cfg = TrainingConfig(num_epochs=50, learning_rate=1e-3, patience=10, use_amp=True)
trainer = Trainer(model, config=cfg)
history = trainer.train(train_loader, val_loader)
# 5. Evaluate
metrics = trainer.metrics(val_loader) # returns dict of MAE, RMSE, MAPE ...
Pick encoder/decoder blocks and attach preprocessing heads at any position in ForecastingModel.
TrainingConfig covers AMP, gradient clipping, LR scheduler, NAS alphas, and conformal settings.
trainer.train(), trainer.metrics(), trainer.plot_prediction(), and auto-logged to MLTracker.
Core classes and common configuration knobs at a glance.
ForecastingModel — main nn.ModuleTrainer — training loop, conformal & NASTrainingConfig, ModelConfigModelEvaluator, TimeSeriesDatasetAttentionLayer, HeadComposer, HeadSpeccreate_dataloaders(X, y, batch_size, val_split)encoder, decoder, head — any nn.Moduleforecasting_strategy
input_preprocessor, input_normalizationoutput_postprocessor, output_normalizationattention_module, head_composerteacher_forcing_ratio, target_len, label_lenadd_head(module, position)
remove_head(position), list_heads()add_graph_block(block, where)get_model_size(), benchmark_inference()set_head_composer(composer), clear_head_composer()get_aux_loss(), get_kl()Practical snippets for the most common use cases.
Compose encoder/decoder, attach a normalisation head and attention, then train.
from foreblocks import ForecastingModel, Trainer, TrainingConfig
from foreblocks.blocks.enc_dec import LSTMEncoder, LSTMDecoder
from foreblocks.core.att import AttentionLayer
from foreblocks.core.heads.revin_head import RevinHead
from foreblocks.aux import create_dataloaders
enc = LSTMEncoder(input_size=3, hidden_size=64, num_layers=2)
dec = LSTMDecoder(input_size=1, hidden_size=64, output_size=1, num_layers=2)
attn = AttentionLayer(method="dot", encoder_hidden_size=64, decoder_hidden_size=64)
model = ForecastingModel(
encoder=enc, decoder=dec,
forecasting_strategy="seq2seq", model_type="lstm",
target_len=24, output_size=1,
attention_module=attn,
)
model.add_head(RevinHead(input_size=3), position="input_norm")
train_loader, val_loader = create_dataloaders(X_train, y_train)
trainer = Trainer(model, config=TrainingConfig(num_epochs=80, patience=12))
history = trainer.train(train_loader, val_loader)
trainer.plot_prediction(val_loader)
Let the Trainer learn which preprocessing heads work best via differentiable architecture search.
from foreblocks import ForecastingModel, Trainer, TrainingConfig
from foreblocks.blocks.enc_dec import LSTMEncoder, LSTMDecoder
from foreblocks.core.heads.head_helper import HeadComposer, HeadSpec
from foreblocks.core.heads.revin_head import RevinHead
from foreblocks.core.heads.fft_topk_head import FFTTopKHead
from foreblocks.core.heads.differencing_head import DifferencingHead
composer = HeadComposer([
HeadSpec("revin", RevinHead(input_size=4)),
HeadSpec("fft_topk", FFTTopKHead(top_k=16)),
HeadSpec("diff", DifferencingHead()),
])
enc = LSTMEncoder(input_size=4, hidden_size=128, num_layers=2)
dec = LSTMDecoder(input_size=1, hidden_size=128, output_size=1, num_layers=2)
model = ForecastingModel(
encoder=enc, decoder=dec,
forecasting_strategy="seq2seq", model_type="lstm",
target_len=48, output_size=1,
head_composer=composer,
)
cfg = TrainingConfig(
num_epochs=100, learning_rate=1e-3,
train_nas=True, nas_warmup_epochs=10,
)
trainer = Trainer(model, config=cfg)
trainer.train(train_loader, val_loader)