or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

geographic-data.mdindex.mdinput-format.mdtweet-entities.mdtweet-model.mduser-model.md

tweet-entities.mddocs/

0

# Tweet Entities

1

2

Extracted entities from tweet text including hashtags, URLs, user mentions, media attachments, and financial symbols.

3

4

## Capabilities

5

6

### Entities Class

7

8

Container for all entities parsed from tweet text, providing structured access to hashtags, URLs, mentions, media, and symbols.

9

10

```java { .api }

11

/**

12

* Container for all entities parsed from tweet text.

13

* Automatically extracts and categorizes different types of content from tweets.

14

*/

15

public class Entities {

16

17

/**

18

* Default constructor (initializes all entity lists)

19

*/

20

public Entities();

21

22

/**

23

* Gets the list of hashtags extracted from tweet text

24

* @return List of HashTags objects

25

*/

26

public List<HashTags> getHashtags();

27

28

/**

29

* Sets the list of hashtags

30

* @param hashtags - List of HashTags objects

31

*/

32

public void setHashtags(List<HashTags> hashtags);

33

34

/**

35

* Gets the list of URLs found in tweet text

36

* @return List of URL objects

37

*/

38

public List<URL> getUrls();

39

40

/**

41

* Sets the list of URLs

42

* @param urls - List of URL objects

43

*/

44

public void setUrls(List<URL> urls);

45

46

/**

47

* Gets the list of user mentions in tweet text

48

* @return List of UserMention objects

49

*/

50

public List<UserMention> getUser_mentions();

51

52

/**

53

* Sets the list of user mentions

54

* @param user_mentions - List of UserMention objects

55

*/

56

public void setUser_mentions(List<UserMention> user_mentions);

57

58

/**

59

* Gets the list of media attachments

60

* @return List of Media objects

61

*/

62

public List<Media> getMedia();

63

64

/**

65

* Sets the list of media attachments

66

* @param media - List of Media objects

67

*/

68

public void setMedia(List<Media> media);

69

70

/**

71

* Gets the list of financial symbols (cashtags)

72

* @return List of Symbol objects

73

*/

74

public List<Symbol> getSymbols();

75

76

/**

77

* Sets the list of financial symbols

78

* @param symbols - List of Symbol objects

79

*/

80

public void setSymbols(List<Symbol> symbols);

81

}

82

```

83

84

### HashTags Class

85

86

Hashtags extracted from tweet text with position information.

87

88

```java { .api }

89

/**

90

* Hashtag entities parsed from tweet text.

91

* Includes text content and position indices within the tweet.

92

*/

93

public class HashTags {

94

95

/**

96

* Gets the hashtag text (without the # symbol)

97

* @return Hashtag text string

98

*/

99

public String getText();

100

101

/**

102

* Sets the hashtag text with optional hash symbol handling

103

* @param text - Hashtag text

104

* @param hashExist - Whether text already includes # symbol

105

*/

106

public void setText(String text, boolean hashExist);

107

108

/**

109

* Gets the character position indices where hashtag appears in tweet text

110

* @return Array of [start, end] positions

111

*/

112

public long[] getIndices();

113

114

/**

115

* Sets the character position indices

116

* @param indices - Array of [start, end] positions

117

*/

118

public void setIndices(long[] indices);

119

120

/**

121

* Sets the position indices using start and end values

122

* @param start - Starting character position

123

* @param end - Ending character position

124

*/

125

public void setIndices(long start, long end);

126

}

127

```

128

129

### URL Class

130

131

URLs found in tweet text with expanded and display versions.

132

133

```java { .api }

134

/**

135

* URL entities included in tweet text.

136

* Includes original, display, and expanded versions of URLs.

137

*/

138

public class URL {

139

140

/**

141

* Default constructor

142

*/

143

public URL();

144

145

/**

146

* Gets the original URL as it appears in tweet text (usually shortened)

147

* @return Original URL string

148

*/

149

public String getUrl();

150

151

/**

152

* Sets the original URL

153

* @param url - Original URL string

154

*/

155

public void setUrl(String url);

156

157

/**

158

* Gets the display URL shown to users (truncated if long)

159

* @return Display URL string

160

*/

161

public String getDisplay_url();

162

163

/**

164

* Sets the display URL

165

* @param display_url - Display URL string

166

*/

167

public void setDisplay_url(String display_url);

168

169

/**

170

* Gets the fully expanded URL

171

* @return Expanded URL string

172

*/

173

public String getExpanded_url();

174

175

/**

176

* Sets the fully expanded URL

177

* @param expanded_url - Expanded URL string

178

*/

179

public void setExpanded_url(String expanded_url);

180

181

/**

182

* Gets the character position indices where URL appears in tweet text

183

* @return Array of [start, end] positions

184

*/

185

public long[] getIndices();

186

187

/**

188

* Sets the character position indices

189

* @param indices - Array of [start, end] positions

190

*/

191

public void setIndices(long[] indices);

192

}

193

```

194

195

### UserMention Class

196

197

User mentions (@username) found in tweet text with user details.

198

199

```java { .api }

200

/**

201

* User mention entities representing @username references in tweet text.

202

* Includes user ID, screen name, and display name information.

203

*/

204

public class UserMention {

205

206

/**

207

* Default constructor

208

*/

209

public UserMention();

210

211

/**

212

* Gets the mentioned user's unique ID

213

* @return User ID as long

214

*/

215

public long getId();

216

217

/**

218

* Sets the mentioned user's ID

219

* @param id - User ID

220

*/

221

public void setId(long id);

222

223

/**

224

* Gets the mentioned user's ID as string

225

* @return User ID string (computed from long ID)

226

*/

227

public String getId_str();

228

229

/**

230

* Sets the user ID string (internal method)

231

*/

232

public void setId_str();

233

234

/**

235

* Gets the mentioned user's screen name (without @ symbol)

236

* @return Screen name string

237

*/

238

public String getScreen_name();

239

240

/**

241

* Sets the mentioned user's screen name

242

* @param screen_name - Screen name string

243

*/

244

public void setScreen_name(String screen_name);

245

246

/**

247

* Gets the mentioned user's display name

248

* @return Display name string

249

*/

250

public String getName();

251

252

/**

253

* Sets the mentioned user's display name

254

* @param name - Display name string

255

*/

256

public void setName(String name);

257

258

/**

259

* Gets the character position indices where mention appears in tweet text

260

* @return Array of [start, end] positions

261

*/

262

public long[] getIndices();

263

264

/**

265

* Sets the character position indices

266

* @param indices - Array of [start, end] positions

267

*/

268

public void setIndices(long[] indices);

269

}

270

```

271

272

### Media Class

273

274

Media attachments (photos, videos) uploaded with the tweet.

275

276

```java { .api }

277

/**

278

* Media entities representing photos, videos, and other media uploaded with tweets.

279

* Includes URLs, metadata, and available size variants.

280

*/

281

public class Media {

282

283

/**

284

* Default constructor

285

*/

286

public Media();

287

288

/**

289

* Gets the unique media ID

290

* @return Media ID as long

291

*/

292

public long getId();

293

294

/**

295

* Sets the unique media ID

296

* @param id - Media ID

297

*/

298

public void setId(long id);

299

300

/**

301

* Gets the media ID as string

302

* @return Media ID string

303

*/

304

public String getId_str();

305

306

/**

307

* Sets the media ID as string

308

* @param id_str - Media ID string

309

*/

310

public void setId_str(String id_str);

311

312

/**

313

* Gets the media URL (HTTP)

314

* @return Media URL string

315

*/

316

public String getMedia_url();

317

318

/**

319

* Sets the media URL

320

* @param media_url - Media URL string

321

*/

322

public void setMedia_url(String media_url);

323

324

/**

325

* Gets the media URL (HTTPS)

326

* @return HTTPS media URL string

327

*/

328

public String getMedia_url_https();

329

330

/**

331

* Sets the HTTPS media URL

332

* @param media_url_https - HTTPS media URL string

333

*/

334

public void setMedia_url_https(String media_url_https);

335

336

/**

337

* Gets the display URL shown in tweet text

338

* @return Display URL string

339

*/

340

public String getDisplay_url();

341

342

/**

343

* Sets the display URL

344

* @param display_url - Display URL string

345

*/

346

public void setDisplay_url(String display_url);

347

348

/**

349

* Gets the expanded URL linking to media

350

* @return Expanded URL string

351

*/

352

public String getExpanded_url();

353

354

/**

355

* Sets the expanded URL

356

* @param expanded_url - Expanded URL string

357

*/

358

public void setExpanded_url(String expanded_url);

359

360

/**

361

* Gets the shortened URL as it appears in tweet text

362

* @return Shortened URL string

363

*/

364

public String getUrl();

365

366

/**

367

* Sets the shortened URL

368

* @param url - Shortened URL string

369

*/

370

public void setUrl(String url);

371

372

/**

373

* Gets the media type (e.g., "photo", "video")

374

* @return Media type string

375

*/

376

public String getType();

377

378

/**

379

* Sets the media type

380

* @param type - Media type string

381

*/

382

public void setType(String type);

383

384

/**

385

* Gets the available size variants for the media

386

* @return Map of size names to Size objects

387

*/

388

public Map<String, Size> getSizes();

389

390

/**

391

* Sets the available size variants

392

* @param sizes - Map of size names to Size objects

393

*/

394

public void setSizes(Map<String, Size> sizes);

395

396

/**

397

* Gets the character position indices where media URL appears in tweet text

398

* @return Array of [start, end] positions

399

*/

400

public long[] getIndices();

401

402

/**

403

* Sets the character position indices

404

* @param indices - Array of [start, end] positions

405

*/

406

public void setIndices(long[] indices);

407

}

408

```

409

410

### Size Class

411

412

Size variants available for media files.

413

414

```java { .api }

415

/**

416

* Size information for media file variants.

417

* Different sizes are available for different display contexts.

418

*/

419

public class Size {

420

421

/**

422

* Constructor with size dimensions and resize method

423

* @param width - Width in pixels

424

* @param height - Height in pixels

425

* @param resize - Resize method ("fit", "crop")

426

*/

427

public Size(long width, long height, String resize);

428

429

/**

430

* Gets the width in pixels

431

* @return Width as long

432

*/

433

public long getWidth();

434

435

/**

436

* Sets the width in pixels

437

* @param width - Width value

438

*/

439

public void setWidth(long width);

440

441

/**

442

* Gets the height in pixels

443

* @return Height as long

444

*/

445

public long getHeight();

446

447

/**

448

* Sets the height in pixels

449

* @param height - Height value

450

*/

451

public void setHeight(long height);

452

453

/**

454

* Gets the resize method used for this size variant

455

* @return Resize method ("fit", "crop")

456

*/

457

public String getResize();

458

459

/**

460

* Sets the resize method

461

* @param resize - Resize method string

462

*/

463

public void setResize(String resize);

464

}

465

```

466

467

### Symbol Class

468

469

Financial symbols (cashtags) starting with dollar sign.

470

471

```java { .api }

472

/**

473

* Financial symbol entities (cashtags) starting with dollar sign.

474

* Used for stock tickers and financial references.

475

*/

476

public class Symbol {

477

478

/**

479

* Default constructor

480

*/

481

public Symbol();

482

483

/**

484

* Gets the symbol text (without the $ symbol)

485

* @return Symbol text string

486

*/

487

public String getText();

488

489

/**

490

* Sets the symbol text

491

* @param text - Symbol text string

492

*/

493

public void setText(String text);

494

495

/**

496

* Gets the character position indices where symbol appears in tweet text

497

* @return Array of [start, end] positions

498

*/

499

public long[] getIndices();

500

501

/**

502

* Sets the character position indices

503

* @param indices - Array of [start, end] positions

504

*/

505

public void setIndices(long[] indices);

506

}

507

```

508

509

**Usage Examples:**

510

511

```java

512

import org.apache.flink.contrib.tweetinputformat.model.tweet.Tweet;

513

import org.apache.flink.contrib.tweetinputformat.model.tweet.entities.*;

514

515

// Process tweet entities

516

DataSet<Tweet> tweets = env.readFile(new SimpleTweetInputFormat(), "tweets.json");

517

518

// Extract all hashtags

519

DataSet<String> hashtags = tweets

520

.flatMap(tweet -> tweet.getEntities().getHashtags())

521

.map(hashtag -> hashtag.getText());

522

523

// Find tweets with specific hashtags

524

DataSet<Tweet> techTweets = tweets.filter(tweet -> {

525

List<HashTags> hashtags = tweet.getEntities().getHashtags();

526

return hashtags.stream().anyMatch(ht ->

527

ht.getText().toLowerCase().contains("tech") ||

528

ht.getText().toLowerCase().contains("ai")

529

);

530

});

531

532

// Extract URLs and their expanded versions

533

DataSet<Tuple2<String, String>> urlMappings = tweets

534

.flatMap(tweet -> tweet.getEntities().getUrls())

535

.map(url -> new Tuple2<>(url.getUrl(), url.getExpanded_url()));

536

537

// Find tweets with media attachments

538

DataSet<Tweet> mediaTweets = tweets.filter(tweet ->

539

tweet.getEntities().getMedia().size() > 0

540

);

541

```

542

543

```java

544

// Detailed entity analysis

545

Tweet tweet = new Tweet();

546

// Tweet populated by input format...

547

548

Entities entities = tweet.getEntities();

549

550

// Process hashtags

551

System.out.println("Hashtags found:");

552

for (HashTags hashtag : entities.getHashtags()) {

553

System.out.println(" #" + hashtag.getText());

554

long[] indices = hashtag.getIndices();

555

System.out.println(" Position: " + indices[0] + "-" + indices[1]);

556

}

557

558

// Process user mentions

559

System.out.println("User mentions:");

560

for (UserMention mention : entities.getUser_mentions()) {

561

System.out.println(" @" + mention.getScreen_name());

562

System.out.println(" User: " + mention.getName());

563

System.out.println(" ID: " + mention.getId());

564

}

565

566

// Process URLs

567

System.out.println("URLs found:");

568

for (URL url : entities.getUrls()) {

569

System.out.println(" Original: " + url.getUrl());

570

System.out.println(" Display: " + url.getDisplay_url());

571

System.out.println(" Expanded: " + url.getExpanded_url());

572

}

573

574

// Process media

575

System.out.println("Media attachments:");

576

for (Media media : entities.getMedia()) {

577

System.out.println(" Type: " + media.getType());

578

System.out.println(" URL: " + media.getMedia_url_https());

579

580

// Check available sizes

581

Map<String, Size> sizes = media.getSizes();

582

for (Map.Entry<String, Size> entry : sizes.entrySet()) {

583

Size size = entry.getValue();

584

System.out.println(" " + entry.getKey() + ": " +

585

size.getWidth() + "x" + size.getHeight() + " (" + size.getResize() + ")");

586

}

587

}

588

589

// Process financial symbols

590

System.out.println("Financial symbols:");

591

for (Symbol symbol : entities.getSymbols()) {

592

System.out.println(" $" + symbol.getText());

593

}

594

```

595

596

```java

597

// Entity-based analytics

598

DataSet<Tweet> tweets = env.readFile(new SimpleTweetInputFormat(), "tweets.json");

599

600

// Most popular hashtags

601

DataSet<Tuple2<String, Long>> hashtagCounts = tweets

602

.flatMap(tweet -> tweet.getEntities().getHashtags())

603

.map(hashtag -> new Tuple2<>(hashtag.getText(), 1L))

604

.groupBy(0)

605

.sum(1)

606

.sortPartition(1, Order.DESCENDING);

607

608

// Domain analysis from URLs

609

DataSet<Tuple2<String, Long>> domainCounts = tweets

610

.flatMap(tweet -> tweet.getEntities().getUrls())

611

.map(url -> {

612

try {

613

java.net.URL parsedUrl = new java.net.URL(url.getExpanded_url());

614

return new Tuple2<>(parsedUrl.getHost(), 1L);

615

} catch (Exception e) {

616

return new Tuple2<>("unknown", 1L);

617

}

618

})

619

.groupBy(0)

620

.sum(1);

621

622

// User interaction network from mentions

623

DataSet<Tuple2<String, String>> mentionNetwork = tweets

624

.flatMap(tweet -> {

625

String author = tweet.getUser().getScreen_name();

626

return tweet.getEntities().getUser_mentions().stream()

627

.map(mention -> new Tuple2<>(author, mention.getScreen_name()))

628

.collect(Collectors.toList());

629

});

630

```

631

632

**Key Features:**

633

634

- **Automatic Extraction**: Entities are automatically parsed from tweet text during JSON processing

635

- **Position Tracking**: All entities include character position indices within the tweet text

636

- **URL Expansion**: URLs include original, display, and fully expanded versions

637

- **Media Variants**: Media includes multiple size variants for different display contexts

638

- **User Information**: Mentions include full user details (ID, screen name, display name)

639

- **Financial Symbols**: Support for cashtag extraction ($AAPL, $TSLA, etc.)

640

- **Comprehensive Coverage**: All entity types from Twitter's REST API v1.1