or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

dataset-iteration.mdindex.mdmulti-input-output.mdsequence-processing.mdspark-integration.md

sequence-processing.mddocs/

0

# Sequence Processing

1

2

Time series and sequential data processing capabilities for DeepLearning4j. The SequenceRecordReaderDataSetIterator handles variable-length sequences with configurable alignment modes, making it ideal for RNN, LSTM, and other sequence-based neural network architectures.

3

4

## Capabilities

5

6

### SequenceRecordReaderDataSetIterator

7

8

Main class for converting SequenceRecordReader data into DataSet objects for sequence-based neural network training.

9

10

```java { .api }

11

public class SequenceRecordReaderDataSetIterator implements DataSetIterator {

12

// Single reader constructors (features and labels from same reader)

13

public SequenceRecordReaderDataSetIterator(SequenceRecordReader reader,

14

int miniBatchSize,

15

int numPossibleLabels,

16

int labelIndex);

17

public SequenceRecordReaderDataSetIterator(SequenceRecordReader reader,

18

int miniBatchSize,

19

int numPossibleLabels,

20

int labelIndex,

21

boolean regression);

22

23

// Separate readers constructors (separate readers for features and labels)

24

public SequenceRecordReaderDataSetIterator(SequenceRecordReader featuresReader,

25

SequenceRecordReader labelsReader,

26

int miniBatchSize,

27

int numPossibleLabels);

28

public SequenceRecordReaderDataSetIterator(SequenceRecordReader featuresReader,

29

SequenceRecordReader labelsReader,

30

int miniBatchSize,

31

int numPossibleLabels,

32

boolean regression);

33

public SequenceRecordReaderDataSetIterator(SequenceRecordReader featuresReader,

34

SequenceRecordReader labelsReader,

35

int miniBatchSize,

36

int numPossibleLabels,

37

boolean regression,

38

AlignmentMode alignmentMode);

39

40

// Iterator methods

41

public boolean hasNext();

42

public DataSet next();

43

public DataSet next(int num);

44

public void remove();

45

46

// Configuration methods

47

public void setPreProcessor(DataSetPreProcessor preProcessor);

48

public DataSetPreProcessor getPreProcessor();

49

public void setCollectMetaData(boolean collectMetaData);

50

public boolean getCollectMetaData();

51

52

// Information methods

53

public int totalExamples();

54

public int inputColumns();

55

public int totalOutcomes();

56

public int batch();

57

public int cursor();

58

public int numExamples();

59

public List<String> getLabels();

60

61

// Reset and async support

62

public boolean resetSupported();

63

public boolean asyncSupported();

64

public void reset();

65

66

// Metadata support

67

public DataSet loadFromMetaData(RecordMetaData recordMetaData) throws IOException;

68

public DataSet loadFromMetaData(List<RecordMetaData> recordMetaDatas) throws IOException;

69

}

70

```

71

72

### AlignmentMode Enum

73

74

Defines how sequences of different lengths are aligned within a batch.

75

76

```java { .api }

77

public enum AlignmentMode {

78

EQUAL_LENGTH, // All sequences must be the same length (throws exception if not)

79

ALIGN_START, // Align sequences at the start, pad shorter sequences at the end

80

ALIGN_END // Align sequences at the end, pad shorter sequences at the start

81

}

82

```

83

84

## Constructor Parameters

85

86

### Single Reader Parameters

87

- **reader**: SequenceRecordReader containing both features and labels

88

- **miniBatchSize**: Number of sequences per batch

89

- **numPossibleLabels**: Number of possible label classes

90

- **labelIndex**: Column index containing the label within each time step

91

- **regression**: true for regression tasks, false for classification

92

93

### Separate Readers Parameters

94

- **featuresReader**: SequenceRecordReader for input features

95

- **labelsReader**: SequenceRecordReader for target labels

96

- **miniBatchSize**: Number of sequences per batch

97

- **numPossibleLabels**: Number of possible label classes

98

- **regression**: true for regression tasks, false for classification

99

- **alignmentMode**: How to handle sequences of different lengths

100

101

## Usage Examples

102

103

### Basic Time Series Classification

104

105

```java

106

import org.datavec.api.records.reader.SequenceRecordReader;

107

import org.datavec.api.records.reader.impl.csv.CSVSequenceRecordReader;

108

import org.datavec.api.split.NumberedFileInputSplit;

109

import org.deeplearning4j.datasets.datavec.SequenceRecordReaderDataSetIterator;

110

111

// Setup sequence readers

112

SequenceRecordReader featuresReader = new CSVSequenceRecordReader();

113

SequenceRecordReader labelsReader = new CSVSequenceRecordReader();

114

115

// Initialize with file splits (one file per sequence)

116

featuresReader.initialize(new NumberedFileInputSplit("features_%d.csv", 0, 999));

117

labelsReader.initialize(new NumberedFileInputSplit("labels_%d.csv", 0, 999));

118

119

// Create sequence iterator

120

SequenceRecordReaderDataSetIterator iterator =

121

new SequenceRecordReaderDataSetIterator(

122

featuresReader, // features reader

123

labelsReader, // labels reader

124

32, // miniBatchSize

125

10, // numPossibleLabels (10 classes)

126

false, // regression = false (classification)

127

AlignmentMode.ALIGN_START // alignment mode

128

);

129

130

// Use iterator for training

131

while (iterator.hasNext()) {

132

DataSet sequenceData = iterator.next();

133

// Features shape: [batchSize, numFeatures, maxSequenceLength]

134

// Labels shape: [batchSize, numLabels, maxSequenceLength]

135

System.out.println("Features shape: " + Arrays.toString(sequenceData.getFeatures().shape()));

136

}

137

```

138

139

### Single Reader Example

140

141

```java

142

// When features and labels are in the same sequence file

143

SequenceRecordReader reader = new CSVSequenceRecordReader();

144

reader.initialize(new NumberedFileInputSplit("data_%d.csv", 0, 99));

145

146

// Last column (index 5) contains labels

147

SequenceRecordReaderDataSetIterator iterator =

148

new SequenceRecordReaderDataSetIterator(

149

reader, // single reader

150

16, // miniBatchSize

151

3, // numPossibleLabels

152

5 // labelIndex (last column)

153

);

154

```

155

156

### Regression Example

157

158

```java

159

// Time series regression (e.g., stock price prediction)

160

SequenceRecordReaderDataSetIterator regressionIterator =

161

new SequenceRecordReaderDataSetIterator(

162

featuresReader, // features reader

163

labelsReader, // labels reader (continuous values)

164

64, // miniBatchSize

165

1, // numPossibleLabels (1 for regression)

166

true, // regression = true

167

AlignmentMode.EQUAL_LENGTH // all sequences must be same length

168

);

169

```

170

171

### Different Alignment Modes

172

173

```java

174

// ALIGN_START: Pad shorter sequences at the end with zeros

175

SequenceRecordReaderDataSetIterator alignStartIterator =

176

new SequenceRecordReaderDataSetIterator(

177

featuresReader, labelsReader, 32, 5, false,

178

AlignmentMode.ALIGN_START);

179

180

// ALIGN_END: Pad shorter sequences at the beginning with zeros

181

SequenceRecordReaderDataSetIterator alignEndIterator =

182

new SequenceRecordReaderDataSetIterator(

183

featuresReader, labelsReader, 32, 5, false,

184

AlignmentMode.ALIGN_END);

185

186

// EQUAL_LENGTH: Throw exception if sequences have different lengths

187

SequenceRecordReaderDataSetIterator equalLengthIterator =

188

new SequenceRecordReaderDataSetIterator(

189

featuresReader, labelsReader, 32, 5, false,

190

AlignmentMode.EQUAL_LENGTH);

191

```

192

193

### Sequence Preprocessing

194

195

```java

196

import org.nd4j.linalg.dataset.api.preprocessor.NormalizerMinMaxScaler;

197

198

// Create iterator

199

SequenceRecordReaderDataSetIterator iterator =

200

new SequenceRecordReaderDataSetIterator(featuresReader, labelsReader, 32, 5);

201

202

// Add sequence-aware preprocessing

203

NormalizerMinMaxScaler scaler = new NormalizerMinMaxScaler();

204

iterator.setPreProcessor(scaler);

205

206

// Fit normalizer on sequence data

207

scaler.fit(iterator);

208

iterator.reset();

209

210

// Use normalized sequence data

211

while (iterator.hasNext()) {

212

DataSet normalizedSequences = iterator.next();

213

// Each sequence is normalized independently

214

}

215

```

216

217

## Sequence Data Format

218

219

### Input Data Shape

220

- **Features**: [batchSize, numFeatures, maxSequenceLength]

221

- **Labels**: [batchSize, numLabels, maxSequenceLength]

222

- **Feature Mask**: [batchSize, maxSequenceLength] (indicates valid time steps)

223

- **Label Mask**: [batchSize, maxSequenceLength] (indicates valid labels)

224

225

### Masking

226

The iterator automatically creates masks for variable-length sequences:

227

- Feature masks mark valid input time steps (1.0 = valid, 0.0 = padding)

228

- Label masks mark valid output time steps for training

229

- Masked time steps are ignored during training and evaluation

230

231

## Error Handling

232

233

### Common Exceptions

234

- **ZeroLengthSequenceException**: Thrown when a sequence has zero length

235

- **IllegalArgumentException**: Invalid constructor parameters

236

- **IOException**: File reading errors from SequenceRecordReader

237

238

### Validation

239

- miniBatchSize must be positive

240

- numPossibleLabels must be positive

241

- labelIndex must be valid for single-reader mode

242

- Sequence readers must be properly initialized

243

- EQUAL_LENGTH mode requires all sequences to have identical length