Python package for manipulating 2-dimensional tabular data structures with emphasis on speed and big data support
—
Mathematical set operations for combining and comparing data frames using standard set theory operations.
def union(*frames) -> Frame:
"""
Union of data frames (all unique rows from all frames).
Parameters:
- *frames: Frame objects to combine
Returns:
Frame with all unique rows from input frames
"""
def intersect(*frames) -> Frame:
"""
Intersection of data frames (rows common to all frames).
Parameters:
- *frames: Frame objects to intersect
Returns:
Frame with rows present in all input frames
"""
def setdiff(frame1, frame2) -> Frame:
"""
Set difference (rows in frame1 but not in frame2).
Parameters:
- frame1: First Frame
- frame2: Second Frame
Returns:
Frame with rows in frame1 that are not in frame2
"""
def symdiff(frame1, frame2) -> Frame:
"""
Symmetric difference (rows in either frame but not both).
Parameters:
- frame1: First Frame
- frame2: Second Frame
Returns:
Frame with rows in either frame but not in both
"""import datatable as dt
# Create sample frames
A = dt.Frame({'x': [1, 2, 3], 'y': ['a', 'b', 'c']})
B = dt.Frame({'x': [2, 3, 4], 'y': ['b', 'c', 'd']})
C = dt.Frame({'x': [3, 4, 5], 'y': ['c', 'd', 'e']})
# Union - all unique rows
union_AB = dt.union(A, B)
union_ABC = dt.union(A, B, C)
# Intersection - common rows
intersect_AB = dt.intersect(A, B)
intersect_ABC = dt.intersect(A, B, C)
# Set difference
diff_AB = dt.setdiff(A, B) # Rows in A but not B
diff_BA = dt.setdiff(B, A) # Rows in B but not A
# Symmetric difference
symdiff_AB = dt.symdiff(A, B) # Rows in A or B but not bothInstall with Tessl CLI
npx tessl i tessl/pypi-datatable