DataVec integration library providing data loading, transformation, and Spark processing capabilities for DeepLearning4j
—
Time series and sequential data processing capabilities for DeepLearning4j. The SequenceRecordReaderDataSetIterator handles variable-length sequences with configurable alignment modes, making it ideal for RNN, LSTM, and other sequence-based neural network architectures.
Main class for converting SequenceRecordReader data into DataSet objects for sequence-based neural network training.
public class SequenceRecordReaderDataSetIterator implements DataSetIterator {
// Single reader constructors (features and labels from same reader)
public SequenceRecordReaderDataSetIterator(SequenceRecordReader reader,
int miniBatchSize,
int numPossibleLabels,
int labelIndex);
public SequenceRecordReaderDataSetIterator(SequenceRecordReader reader,
int miniBatchSize,
int numPossibleLabels,
int labelIndex,
boolean regression);
// Separate readers constructors (separate readers for features and labels)
public SequenceRecordReaderDataSetIterator(SequenceRecordReader featuresReader,
SequenceRecordReader labelsReader,
int miniBatchSize,
int numPossibleLabels);
public SequenceRecordReaderDataSetIterator(SequenceRecordReader featuresReader,
SequenceRecordReader labelsReader,
int miniBatchSize,
int numPossibleLabels,
boolean regression);
public SequenceRecordReaderDataSetIterator(SequenceRecordReader featuresReader,
SequenceRecordReader labelsReader,
int miniBatchSize,
int numPossibleLabels,
boolean regression,
AlignmentMode alignmentMode);
// Iterator methods
public boolean hasNext();
public DataSet next();
public DataSet next(int num);
public void remove();
// Configuration methods
public void setPreProcessor(DataSetPreProcessor preProcessor);
public DataSetPreProcessor getPreProcessor();
public void setCollectMetaData(boolean collectMetaData);
public boolean getCollectMetaData();
// Information methods
public int totalExamples();
public int inputColumns();
public int totalOutcomes();
public int batch();
public int cursor();
public int numExamples();
public List<String> getLabels();
// Reset and async support
public boolean resetSupported();
public boolean asyncSupported();
public void reset();
// Metadata support
public DataSet loadFromMetaData(RecordMetaData recordMetaData) throws IOException;
public DataSet loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException;
}Defines how sequences of different lengths are aligned within a batch.
public enum AlignmentMode {
EQUAL_LENGTH, // All sequences must be the same length (throws exception if not)
ALIGN_START, // Align sequences at the start, pad shorter sequences at the end
ALIGN_END // Align sequences at the end, pad shorter sequences at the start
}import org.datavec.api.records.reader.SequenceRecordReader;
import org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader;
import org.datavec.api.split.NumberedFileInputSplit;
import org.deeplearning4j.datasets.datavec.SequenceRecordReaderDataSetIterator;
// Setup sequence readers
SequenceRecordReader featuresReader = new CSVSequenceRecordReader();
SequenceRecordReader labelsReader = new CSVSequenceRecordReader();
// Initialize with file splits (one file per sequence)
featuresReader.initialize(new NumberedFileInputSplit("features_%d.csv", 0, 999));
labelsReader.initialize(new NumberedFileInputSplit("labels_%d.csv", 0, 999));
// Create sequence iterator
SequenceRecordReaderDataSetIterator iterator =
new SequenceRecordReaderDataSetIterator(
featuresReader, // features reader
labelsReader, // labels reader
32, // miniBatchSize
10, // numPossibleLabels (10 classes)
false, // regression = false (classification)
AlignmentMode.ALIGN_START // alignment mode
);
// Use iterator for training
while (iterator.hasNext()) {
DataSet sequenceData = iterator.next();
// Features shape: [batchSize, numFeatures, maxSequenceLength]
// Labels shape: [batchSize, numLabels, maxSequenceLength]
System.out.println("Features shape: " + Arrays.toString(sequenceData.getFeatures().shape()));
}// When features and labels are in the same sequence file
SequenceRecordReader reader = new CSVSequenceRecordReader();
reader.initialize(new NumberedFileInputSplit("data_%d.csv", 0, 99));
// Last column (index 5) contains labels
SequenceRecordReaderDataSetIterator iterator =
new SequenceRecordReaderDataSetIterator(
reader, // single reader
16, // miniBatchSize
3, // numPossibleLabels
5 // labelIndex (last column)
);// Time series regression (e.g., stock price prediction)
SequenceRecordReaderDataSetIterator regressionIterator =
new SequenceRecordReaderDataSetIterator(
featuresReader, // features reader
labelsReader, // labels reader (continuous values)
64, // miniBatchSize
1, // numPossibleLabels (1 for regression)
true, // regression = true
AlignmentMode.EQUAL_LENGTH // all sequences must be same length
);// ALIGN_START: Pad shorter sequences at the end with zeros
SequenceRecordReaderDataSetIterator alignStartIterator =
new SequenceRecordReaderDataSetIterator(
featuresReader, labelsReader, 32, 5, false,
AlignmentMode.ALIGN_START);
// ALIGN_END: Pad shorter sequences at the beginning with zeros
SequenceRecordReaderDataSetIterator alignEndIterator =
new SequenceRecordReaderDataSetIterator(
featuresReader, labelsReader, 32, 5, false,
AlignmentMode.ALIGN_END);
// EQUAL_LENGTH: Throw exception if sequences have different lengths
SequenceRecordReaderDataSetIterator equalLengthIterator =
new SequenceRecordReaderDataSetIterator(
featuresReader, labelsReader, 32, 5, false,
AlignmentMode.EQUAL_LENGTH);import org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler;
// Create iterator
SequenceRecordReaderDataSetIterator iterator =
new SequenceRecordReaderDataSetIterator(featuresReader, labelsReader, 32, 5);
// Add sequence-aware preprocessing
NormalizerMinMaxScaler scaler = new NormalizerMinMaxScaler();
iterator.setPreProcessor(scaler);
// Fit normalizer on sequence data
scaler.fit(iterator);
iterator.reset();
// Use normalized sequence data
while (iterator.hasNext()) {
DataSet normalizedSequences = iterator.next();
// Each sequence is normalized independently
}The iterator automatically creates masks for variable-length sequences:
Install with Tessl CLI
npx tessl i tessl/maven-org-datavec--datavec-local