SortedBucketScioContext

final class SortedBucketScioContext extends Serializable

Source: SortMergeBucketScioContextSyntax.scala

Linear Supertypes

Serializable, AnyRef, Any

Ordering

Alphabetic
By Inheritance

Inherited

SortedBucketScioContext
Serializable
AnyRef
Any

Hide All
Show All

Visibility

Public
Protected

Instance Constructors

new SortedBucketScioContext(self: ScioContext)

Deprecated Type Members

type SortMergeTransformReadBuilder[KeyType, K1, K2, R] = ReadBuilder[KeyType, K1, K2, R]
Annotations
@deprecated
Deprecated
(Since version 0.14.0) Use SortMergeTransform.ReadBuilder instead
type SortMergeTransformWithSideInputsWriteBuilder[KeyType, R, W] = WithSideInputsWriteBuilder[KeyType, R, W]
Annotations
@deprecated
Deprecated
(Since version 0.14.0) Use SortMergeTransform.WithSideInputsWriteBuilder instead
type SortMergeTransformWriteBuilder[KeyType, R, W] = WriteBuilder[KeyType, R, W]
Annotations
@deprecated
Deprecated
(Since version 0.14.0) Use SortMergeTransform.WriteBuilder instead

Value Members

final def !=(arg0: Any): Boolean
Definition Classes
AnyRef → Any
final def ##: Int
Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean
Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0
Definition Classes
Any
def clone(): AnyRef
Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.CloneNotSupportedException]) @native()
final def eq(arg0: AnyRef): Boolean
Definition Classes
AnyRef
def equals(arg0: AnyRef): Boolean
Definition Classes
AnyRef → Any
def finalize(): Unit
Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.Throwable])
final def getClass(): Class[_ <: AnyRef]
Definition Classes
AnyRef → Any
Annotations
@native()
def hashCode(): Int
Definition Classes
AnyRef → Any
Annotations
@native()
final def isInstanceOf[T0]: Boolean
Definition Classes
Any
final def ne(arg0: AnyRef): Boolean
Definition Classes
AnyRef
final def notify(): Unit
Definition Classes
AnyRef
Annotations
@native()
final def notifyAll(): Unit
Definition Classes
AnyRef
Annotations
@native()
def sortMergeCoGroup[K1, K2, A, B, C, D](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], c: Read[C], d: Read[D])(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B], arg4: Coder[C], arg5: Coder[D]): SCollection[((K1, K2), (Iterable[A], Iterable[B], Iterable[C], Iterable[D]))]
targetParallelism defaults to TargetParallelism.auto()
targetParallelism defaults to TargetParallelism.auto()
Annotations
@experimental()
def sortMergeCoGroup[K1, K2, A, B, C, D](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], c: Read[C], d: Read[D], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B], arg4: Coder[C], arg5: Coder[D]): SCollection[((K1, K2), (Iterable[A], Iterable[B], Iterable[C], Iterable[D]))]
Secondary keyed variant
Secondary keyed variant
Annotations
@experimental()
def sortMergeCoGroup[K, A, B, C, D](keyClass: Class[K], a: Read[A], b: Read[B], c: Read[C], d: Read[D])(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B], arg3: Coder[C], arg4: Coder[D]): SCollection[(K, (Iterable[A], Iterable[B], Iterable[C], Iterable[D]))]
targetParallelism defaults to TargetParallelism.auto()
targetParallelism defaults to TargetParallelism.auto()
Annotations
@experimental()
def sortMergeCoGroup[K, A, B, C, D](keyClass: Class[K], a: Read[A], b: Read[B], c: Read[C], d: Read[D], targetParallelism: TargetParallelism)(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B], arg3: Coder[C], arg4: Coder[D]): SCollection[(K, (Iterable[A], Iterable[B], Iterable[C], Iterable[D]))]
For each key K in a or b or c or d, return a resulting SCollection that contains a tuple with the list of values for that key in a, b, c and d.
For each key K in a or b or c or d, return a resulting SCollection that contains a tuple with the list of values for that key in a, b, c and d.
See note on SortedBucketScioContext.sortMergeJoin for information on how an SMB cogroup differs from a regular org.apache.beam.sdk.transforms.join.CoGroupByKey operation.
keyClass
cogroup key class. Must have a Coder in Beam's default org.apache.beam.sdk.coders.CoderRegistry as custom key coders are not supported yet.
targetParallelism
the desired parallelism of the job. See org.apache.beam.sdk.extensions.smb.TargetParallelism for more information.
Annotations
@experimental()
def sortMergeCoGroup[K1, K2, A, B, C](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], c: Read[C])(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B], arg4: Coder[C]): SCollection[((K1, K2), (Iterable[A], Iterable[B], Iterable[C]))]
targetParallelism defaults to TargetParallelism.auto()
targetParallelism defaults to TargetParallelism.auto()
Annotations
@experimental()
def sortMergeCoGroup[K1, K2, A, B, C](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], c: Read[C], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B], arg4: Coder[C]): SCollection[((K1, K2), (Iterable[A], Iterable[B], Iterable[C]))]
Secondary keyed variant
Secondary keyed variant
Annotations
@experimental()
def sortMergeCoGroup[K, A, B, C](keyClass: Class[K], a: Read[A], b: Read[B], c: Read[C])(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B], arg3: Coder[C]): SCollection[(K, (Iterable[A], Iterable[B], Iterable[C]))]
targetParallelism defaults to TargetParallelism.auto()
targetParallelism defaults to TargetParallelism.auto()
Annotations
@experimental()
def sortMergeCoGroup[K, A, B, C](keyClass: Class[K], a: Read[A], b: Read[B], c: Read[C], targetParallelism: TargetParallelism)(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B], arg3: Coder[C]): SCollection[(K, (Iterable[A], Iterable[B], Iterable[C]))]
For each key K in a or b or c, return a resulting SCollection that contains a tuple with the list of values for that key in a, b and c.
For each key K in a or b or c, return a resulting SCollection that contains a tuple with the list of values for that key in a, b and c.
See note on SortedBucketScioContext.sortMergeJoin for information on how an SMB cogroup differs from a regular org.apache.beam.sdk.transforms.join.CoGroupByKey operation.
keyClass
cogroup key class. Must have a Coder in Beam's default org.apache.beam.sdk.coders.CoderRegistry as custom key coders are not supported yet.
targetParallelism
the desired parallelism of the job. See org.apache.beam.sdk.extensions.smb.TargetParallelism for more information.
Annotations
@experimental()
def sortMergeCoGroup[K1, K2, A, B](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B])(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B]): SCollection[((K1, K2), (Iterable[A], Iterable[B]))]
targetParallelism defaults to TargetParallelism.auto()
targetParallelism defaults to TargetParallelism.auto()
Annotations
@experimental()
def sortMergeCoGroup[K1, K2, A, B](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B]): SCollection[((K1, K2), (Iterable[A], Iterable[B]))]
Secondary keyed variant.
Secondary keyed variant.
Annotations
@experimental()
def sortMergeCoGroup[K, A, B](keyClass: Class[K], a: Read[A], b: Read[B])(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B]): SCollection[(K, (Iterable[A], Iterable[B]))]
targetParallelism defaults to TargetParallelism.auto()
def sortMergeCoGroup[K, A, B](keyClass: Class[K], a: Read[A], b: Read[B], targetParallelism: TargetParallelism = TargetParallelism.auto())(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B]): SCollection[(K, (Iterable[A], Iterable[B]))]
For each key K in a or b return a resulting SCollection that contains a tuple with the list of values for that key in a, and b.
For each key K in a or b return a resulting SCollection that contains a tuple with the list of values for that key in a, and b.
See note on SortedBucketScioContext.sortMergeJoin for information on how an SMB cogroup differs from a regular org.apache.beam.sdk.transforms.join.CoGroupByKey operation.
keyClass
cogroup key class. Must have a Coder in Beam's default org.apache.beam.sdk.coders.CoderRegistry as custom key coders are not supported yet.
targetParallelism
the desired parallelism of the job. See org.apache.beam.sdk.extensions.smb.TargetParallelism for more information.
Annotations
@experimental()
def sortMergeGroupByKey[K1, K2, V](keyClass: Class[K1], keyClassSecondary: Class[K2], read: Read[V])(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[V]): SCollection[((K1, K2), Iterable[V])]
targetParallelism defaults to TargetParallelism.auto()
targetParallelism defaults to TargetParallelism.auto()
Annotations
@experimental()
def sortMergeGroupByKey[K1, K2, V](keyClass: Class[K1], keyClassSecondary: Class[K2], read: Read[V], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[V]): SCollection[((K1, K2), Iterable[V])]
Secondary keyed variant.
Secondary keyed variant.
Annotations
@experimental()
def sortMergeGroupByKey[K, V](keyClass: Class[K], read: Read[V], targetParallelism: TargetParallelism = TargetParallelism.auto())(implicit arg0: Coder[K], arg1: Coder[V]): SCollection[(K, Iterable[V])]
For each key K in read return a resulting SCollection that contains a tuple with the list of values for that key in read.
For each key K in read return a resulting SCollection that contains a tuple with the list of values for that key in read.
See note on SortedBucketScioContext.sortMergeJoin for information on how an SMB group differs from a regular org.apache.beam.sdk.transforms.GroupByKey operation.
keyClass
cogroup key class. Must have a Coder in Beam's default org.apache.beam.sdk.coders.CoderRegistry as custom key coders are not supported yet.
targetParallelism
the desired parallelism of the job. See org.apache.beam.sdk.extensions.smb.TargetParallelism for more information.
Annotations
@experimental()
def sortMergeJoin[K1, K2, L, R](keyClass: Class[K1], keyClassSecondary: Class[K2], lhs: Read[L], rhs: Read[R])(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[L], arg3: Coder[R]): SCollection[((K1, K2), (L, R))]
targetParallelism defaults to TargetParallelism.auto()
targetParallelism defaults to TargetParallelism.auto()
Annotations
@experimental()
def sortMergeJoin[K1, K2, L, R](keyClass: Class[K1], keyClassSecondary: Class[K2], lhs: Read[L], rhs: Read[R], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[L], arg3: Coder[R]): SCollection[((K1, K2), (L, R))]
Secondary keyed variant.
Secondary keyed variant.
Annotations
@experimental()
def sortMergeJoin[K, L, R](keyClass: Class[K], lhs: Read[L], rhs: Read[R], targetParallelism: TargetParallelism = TargetParallelism.auto())(implicit arg0: Coder[K], arg1: Coder[L], arg2: Coder[R]): SCollection[(K, (L, R))]
Return an SCollection containing all pairs of elements with matching keys in lhs and rhs.
Return an SCollection containing all pairs of elements with matching keys in lhs and rhs. Each pair of elements will be returned as a (k, (v1, v2)) tuple, where (k, v1) is in lhs and (k, v2) is in rhs.
Unlike a regular PairSCollectionFunctions.join, the key information (namely, how to extract a comparable K from L and R) is remotely encoded in a org.apache.beam.sdk.extensions.smb.BucketMetadata file in the same directory as the input records. This transform requires a filesystem lookup to ensure that the metadata for each source are compatible. In return for reading pre-sorted data, the shuffle step in a typical org.apache.beam.sdk.transforms.GroupByKey operation can be eliminated.
keyClass
join key class. Must have a Coder in Beam's default org.apache.beam.sdk.coders.CoderRegistry as custom key coders are not supported yet.
targetParallelism
the desired parallelism of the job. See org.apache.beam.sdk.extensions.smb.TargetParallelism for more information.
Annotations
@experimental()
def sortMergeTransform[K1, K2, A, B, C](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], c: Read[C])(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B], arg4: Coder[C]): ReadBuilder[KV[K1, K2], K1, K2, (Iterable[A], Iterable[B], Iterable[C])]
targetParallelism defaults to TargetParallelism.auto()
targetParallelism defaults to TargetParallelism.auto()
Annotations
@experimental()
def sortMergeTransform[K1, K2, A, B, C](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], c: Read[C], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B], arg4: Coder[C]): ReadBuilder[KV[K1, K2], K1, K2, (Iterable[A], Iterable[B], Iterable[C])]
Secondary keyed variant
Secondary keyed variant
Annotations
@experimental()
def sortMergeTransform[K, A, B, C](keyClass: Class[K], a: Read[A], b: Read[B], c: Read[C])(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B], arg3: Coder[C]): ReadBuilder[K, K, Void, (Iterable[A], Iterable[B], Iterable[C])]
targetParallelism defaults to TargetParallelism.auto()
targetParallelism defaults to TargetParallelism.auto()
Annotations
@experimental()
def sortMergeTransform[K, A, B, C](keyClass: Class[K], a: Read[A], b: Read[B], c: Read[C], targetParallelism: TargetParallelism)(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B], arg3: Coder[C]): ReadBuilder[K, K, Void, (Iterable[A], Iterable[B], Iterable[C])]
Perform a 3-way SortedBucketScioContext.sortMergeCoGroup operation, then immediately apply a transformation function to the merged cogroups and re-write using the same bucketing key and hashing scheme.
Perform a 3-way SortedBucketScioContext.sortMergeCoGroup operation, then immediately apply a transformation function to the merged cogroups and re-write using the same bucketing key and hashing scheme. By applying the write, transform, and write in the same transform, an extra shuffle step can be avoided.
Annotations
@experimental()
def sortMergeTransform[K1, K2, A, B](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B])(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B]): ReadBuilder[KV[K1, K2], K1, K2, (Iterable[A], Iterable[B])]
targetParallelism defaults to TargetParallelism.auto()
targetParallelism defaults to TargetParallelism.auto()
Annotations
@experimental()
def sortMergeTransform[K1, K2, A, B](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B]): ReadBuilder[KV[K1, K2], K1, K2, (Iterable[A], Iterable[B])]
Secondary keyed variant
Secondary keyed variant
Annotations
@experimental()
def sortMergeTransform[K, A, B](keyClass: Class[K], a: Read[A], b: Read[B])(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B]): ReadBuilder[K, K, Void, (Iterable[A], Iterable[B])]
targetParallelism defaults to TargetParallelism.auto()
targetParallelism defaults to TargetParallelism.auto()
Annotations
@experimental()
def sortMergeTransform[K, A, B](keyClass: Class[K], a: Read[A], b: Read[B], targetParallelism: TargetParallelism)(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B]): ReadBuilder[K, K, Void, (Iterable[A], Iterable[B])]
Perform a 2-way SortedBucketScioContext.sortMergeCoGroup operation, then immediately apply a transformation function to the merged cogroups and re-write using the same bucketing key and hashing scheme.
Perform a 2-way SortedBucketScioContext.sortMergeCoGroup operation, then immediately apply a transformation function to the merged cogroups and re-write using the same bucketing key and hashing scheme. By applying the write, transform, and write in the same transform, an extra shuffle step can be avoided.
Annotations
@experimental()
def sortMergeTransform[K1, K2, R](keyClass: Class[K1], keyClassSecondary: Class[K2], read: Read[R], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[R]): ReadBuilder[KV[K1, K2], K1, K2, Iterable[R]]
Secondary keyed variant
Secondary keyed variant
Annotations
@experimental()
def sortMergeTransform[K, R](keyClass: Class[K], read: Read[R]): ReadBuilder[K, K, Void, Iterable[R]]
targetParallelism defaults to TargetParallelism.auto()
targetParallelism defaults to TargetParallelism.auto()
Annotations
@experimental()
def sortMergeTransform[K, R](keyClass: Class[K], read: Read[R], targetParallelism: TargetParallelism): ReadBuilder[K, K, Void, Iterable[R]]
Perform a SortedBucketScioContext.sortMergeGroupByKey operation, then immediately apply a transformation function to the merged groups and re-write using the same bucketing key and hashing scheme.
Perform a SortedBucketScioContext.sortMergeGroupByKey operation, then immediately apply a transformation function to the merged groups and re-write using the same bucketing key and hashing scheme. By applying the write, transform, and write in the same transform, an extra shuffle step can be avoided.
Annotations
@experimental()
final def synchronized[T0](arg0: => T0): T0
Definition Classes
AnyRef
def toString(): String
Definition Classes
AnyRef → Any
final def wait(): Unit
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.InterruptedException])
final def wait(arg0: Long, arg1: Int): Unit
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.InterruptedException])
final def wait(arg0: Long): Unit
Definition Classes
AnyRef
Annotations
@throws(classOf[java.lang.InterruptedException]) @native()

Packages

SortedBucketScioContext

final class SortedBucketScioContext extends Serializable

Instance Constructors

Deprecated Type Members

Value Members

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

cogroup

join

per_key

Ungrouped

Packages

SortedBucketScioContext

final class SortedBucketScioContext extends Serializable

Instance Constructors

Deprecated Type Members

Value Members

Inherited from Serializable

Inherited from AnyRef

Inherited from Any

cogroup

join

per_key

Ungrouped

SortedBucketScioContext