final class SortedBucketScioContext extends Serializable
- Alphabetic
- By Inheritance
- SortedBucketScioContext
- Serializable
- AnyRef
- Any
- Hide All
- Show All
- Public
- Protected
Instance Constructors
- new SortedBucketScioContext(self: ScioContext)
Deprecated Type Members
- type SortMergeTransformReadBuilder[KeyType, K1, K2, R] = ReadBuilder[KeyType, K1, K2, R]
- Annotations
- @deprecated
- Deprecated
(Since version 0.14.0) Use SortMergeTransform.ReadBuilder instead
- type SortMergeTransformWithSideInputsWriteBuilder[KeyType, R, W] = WithSideInputsWriteBuilder[KeyType, R, W]
- Annotations
- @deprecated
- Deprecated
(Since version 0.14.0) Use SortMergeTransform.WithSideInputsWriteBuilder instead
- type SortMergeTransformWriteBuilder[KeyType, R, W] = WriteBuilder[KeyType, R, W]
- Annotations
- @deprecated
- Deprecated
(Since version 0.14.0) Use SortMergeTransform.WriteBuilder instead
Value Members
- final def !=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- final def ##: Int
- Definition Classes
- AnyRef → Any
- final def ==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- final def asInstanceOf[T0]: T0
- Definition Classes
- Any
- def clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.CloneNotSupportedException]) @native()
- final def eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- def equals(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef → Any
- def finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.Throwable])
- final def getClass(): Class[_ <: AnyRef]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- def hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- final def isInstanceOf[T0]: Boolean
- Definition Classes
- Any
- final def ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- final def notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- final def notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- def sortMergeCoGroup[K1, K2, A, B, C, D](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], c: Read[C], d: Read[D])(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B], arg4: Coder[C], arg5: Coder[D]): SCollection[((K1, K2), (Iterable[A], Iterable[B], Iterable[C], Iterable[D]))]
targetParallelism
defaults toTargetParallelism.auto()
targetParallelism
defaults toTargetParallelism.auto()
- Annotations
- @experimental()
- def sortMergeCoGroup[K1, K2, A, B, C, D](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], c: Read[C], d: Read[D], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B], arg4: Coder[C], arg5: Coder[D]): SCollection[((K1, K2), (Iterable[A], Iterable[B], Iterable[C], Iterable[D]))]
Secondary keyed variant
Secondary keyed variant
- Annotations
- @experimental()
- def sortMergeCoGroup[K, A, B, C, D](keyClass: Class[K], a: Read[A], b: Read[B], c: Read[C], d: Read[D])(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B], arg3: Coder[C], arg4: Coder[D]): SCollection[(K, (Iterable[A], Iterable[B], Iterable[C], Iterable[D]))]
targetParallelism
defaults toTargetParallelism.auto()
targetParallelism
defaults toTargetParallelism.auto()
- Annotations
- @experimental()
- def sortMergeCoGroup[K, A, B, C, D](keyClass: Class[K], a: Read[A], b: Read[B], c: Read[C], d: Read[D], targetParallelism: TargetParallelism)(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B], arg3: Coder[C], arg4: Coder[D]): SCollection[(K, (Iterable[A], Iterable[B], Iterable[C], Iterable[D]))]
For each key K in
a
orb
orc
ord
, return a resulting SCollection that contains a tuple with the list of values for that key ina
,b
,c
andd
.For each key K in
a
orb
orc
ord
, return a resulting SCollection that contains a tuple with the list of values for that key ina
,b
,c
andd
.See note on SortedBucketScioContext.sortMergeJoin for information on how an SMB cogroup differs from a regular org.apache.beam.sdk.transforms.join.CoGroupByKey operation.
- keyClass
cogroup key class. Must have a Coder in Beam's default org.apache.beam.sdk.coders.CoderRegistry as custom key coders are not supported yet.
- targetParallelism
the desired parallelism of the job. See org.apache.beam.sdk.extensions.smb.TargetParallelism for more information.
- Annotations
- @experimental()
- def sortMergeCoGroup[K1, K2, A, B, C](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], c: Read[C])(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B], arg4: Coder[C]): SCollection[((K1, K2), (Iterable[A], Iterable[B], Iterable[C]))]
targetParallelism
defaults toTargetParallelism.auto()
targetParallelism
defaults toTargetParallelism.auto()
- Annotations
- @experimental()
- def sortMergeCoGroup[K1, K2, A, B, C](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], c: Read[C], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B], arg4: Coder[C]): SCollection[((K1, K2), (Iterable[A], Iterable[B], Iterable[C]))]
Secondary keyed variant
Secondary keyed variant
- Annotations
- @experimental()
- def sortMergeCoGroup[K, A, B, C](keyClass: Class[K], a: Read[A], b: Read[B], c: Read[C])(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B], arg3: Coder[C]): SCollection[(K, (Iterable[A], Iterable[B], Iterable[C]))]
targetParallelism
defaults toTargetParallelism.auto()
targetParallelism
defaults toTargetParallelism.auto()
- Annotations
- @experimental()
- def sortMergeCoGroup[K, A, B, C](keyClass: Class[K], a: Read[A], b: Read[B], c: Read[C], targetParallelism: TargetParallelism)(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B], arg3: Coder[C]): SCollection[(K, (Iterable[A], Iterable[B], Iterable[C]))]
For each key K in
a
orb
orc
, return a resulting SCollection that contains a tuple with the list of values for that key ina
,b
andc
.For each key K in
a
orb
orc
, return a resulting SCollection that contains a tuple with the list of values for that key ina
,b
andc
.See note on SortedBucketScioContext.sortMergeJoin for information on how an SMB cogroup differs from a regular org.apache.beam.sdk.transforms.join.CoGroupByKey operation.
- keyClass
cogroup key class. Must have a Coder in Beam's default org.apache.beam.sdk.coders.CoderRegistry as custom key coders are not supported yet.
- targetParallelism
the desired parallelism of the job. See org.apache.beam.sdk.extensions.smb.TargetParallelism for more information.
- Annotations
- @experimental()
- def sortMergeCoGroup[K1, K2, A, B](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B])(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B]): SCollection[((K1, K2), (Iterable[A], Iterable[B]))]
targetParallelism
defaults toTargetParallelism.auto()
targetParallelism
defaults toTargetParallelism.auto()
- Annotations
- @experimental()
- def sortMergeCoGroup[K1, K2, A, B](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B]): SCollection[((K1, K2), (Iterable[A], Iterable[B]))]
Secondary keyed variant.
Secondary keyed variant.
- Annotations
- @experimental()
- def sortMergeCoGroup[K, A, B](keyClass: Class[K], a: Read[A], b: Read[B])(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B]): SCollection[(K, (Iterable[A], Iterable[B]))]
targetParallelism
defaults toTargetParallelism.auto()
- def sortMergeCoGroup[K, A, B](keyClass: Class[K], a: Read[A], b: Read[B], targetParallelism: TargetParallelism = TargetParallelism.auto())(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B]): SCollection[(K, (Iterable[A], Iterable[B]))]
For each key K in
a
orb
return a resulting SCollection that contains a tuple with the list of values for that key ina
, andb
.For each key K in
a
orb
return a resulting SCollection that contains a tuple with the list of values for that key ina
, andb
.See note on SortedBucketScioContext.sortMergeJoin for information on how an SMB cogroup differs from a regular org.apache.beam.sdk.transforms.join.CoGroupByKey operation.
- keyClass
cogroup key class. Must have a Coder in Beam's default org.apache.beam.sdk.coders.CoderRegistry as custom key coders are not supported yet.
- targetParallelism
the desired parallelism of the job. See org.apache.beam.sdk.extensions.smb.TargetParallelism for more information.
- Annotations
- @experimental()
- def sortMergeGroupByKey[K1, K2, V](keyClass: Class[K1], keyClassSecondary: Class[K2], read: Read[V])(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[V]): SCollection[((K1, K2), Iterable[V])]
targetParallelism
defaults toTargetParallelism.auto()
targetParallelism
defaults toTargetParallelism.auto()
- Annotations
- @experimental()
- def sortMergeGroupByKey[K1, K2, V](keyClass: Class[K1], keyClassSecondary: Class[K2], read: Read[V], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[V]): SCollection[((K1, K2), Iterable[V])]
Secondary keyed variant.
Secondary keyed variant.
- Annotations
- @experimental()
- def sortMergeGroupByKey[K, V](keyClass: Class[K], read: Read[V], targetParallelism: TargetParallelism = TargetParallelism.auto())(implicit arg0: Coder[K], arg1: Coder[V]): SCollection[(K, Iterable[V])]
For each key K in
read
return a resulting SCollection that contains a tuple with the list of values for that key inread
.For each key K in
read
return a resulting SCollection that contains a tuple with the list of values for that key inread
.See note on SortedBucketScioContext.sortMergeJoin for information on how an SMB group differs from a regular org.apache.beam.sdk.transforms.GroupByKey operation.
- keyClass
cogroup key class. Must have a Coder in Beam's default org.apache.beam.sdk.coders.CoderRegistry as custom key coders are not supported yet.
- targetParallelism
the desired parallelism of the job. See org.apache.beam.sdk.extensions.smb.TargetParallelism for more information.
- Annotations
- @experimental()
- def sortMergeJoin[K1, K2, L, R](keyClass: Class[K1], keyClassSecondary: Class[K2], lhs: Read[L], rhs: Read[R])(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[L], arg3: Coder[R]): SCollection[((K1, K2), (L, R))]
targetParallelism
defaults toTargetParallelism.auto()
targetParallelism
defaults toTargetParallelism.auto()
- Annotations
- @experimental()
- def sortMergeJoin[K1, K2, L, R](keyClass: Class[K1], keyClassSecondary: Class[K2], lhs: Read[L], rhs: Read[R], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[L], arg3: Coder[R]): SCollection[((K1, K2), (L, R))]
Secondary keyed variant.
Secondary keyed variant.
- Annotations
- @experimental()
- def sortMergeJoin[K, L, R](keyClass: Class[K], lhs: Read[L], rhs: Read[R], targetParallelism: TargetParallelism = TargetParallelism.auto())(implicit arg0: Coder[K], arg1: Coder[L], arg2: Coder[R]): SCollection[(K, (L, R))]
Return an SCollection containing all pairs of elements with matching keys in
lhs
andrhs
.Return an SCollection containing all pairs of elements with matching keys in
lhs
andrhs
. Each pair of elements will be returned as a (k, (v1, v2)) tuple, where (k, v1) is inlhs
and (k, v2) is inrhs
.Unlike a regular PairSCollectionFunctions.join, the key information (namely, how to extract a comparable
K
fromL
andR
) is remotely encoded in a org.apache.beam.sdk.extensions.smb.BucketMetadata file in the same directory as the input records. This transform requires a filesystem lookup to ensure that the metadata for each source are compatible. In return for reading pre-sorted data, the shuffle step in a typical org.apache.beam.sdk.transforms.GroupByKey operation can be eliminated.- keyClass
join key class. Must have a Coder in Beam's default org.apache.beam.sdk.coders.CoderRegistry as custom key coders are not supported yet.
- targetParallelism
the desired parallelism of the job. See org.apache.beam.sdk.extensions.smb.TargetParallelism for more information.
- Annotations
- @experimental()
- def sortMergeTransform[K1, K2, A, B, C](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], c: Read[C])(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B], arg4: Coder[C]): ReadBuilder[KV[K1, K2], K1, K2, (Iterable[A], Iterable[B], Iterable[C])]
targetParallelism
defaults toTargetParallelism.auto()
targetParallelism
defaults toTargetParallelism.auto()
- Annotations
- @experimental()
- def sortMergeTransform[K1, K2, A, B, C](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], c: Read[C], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B], arg4: Coder[C]): ReadBuilder[KV[K1, K2], K1, K2, (Iterable[A], Iterable[B], Iterable[C])]
Secondary keyed variant
Secondary keyed variant
- Annotations
- @experimental()
- def sortMergeTransform[K, A, B, C](keyClass: Class[K], a: Read[A], b: Read[B], c: Read[C])(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B], arg3: Coder[C]): ReadBuilder[K, K, Void, (Iterable[A], Iterable[B], Iterable[C])]
targetParallelism
defaults toTargetParallelism.auto()
targetParallelism
defaults toTargetParallelism.auto()
- Annotations
- @experimental()
- def sortMergeTransform[K, A, B, C](keyClass: Class[K], a: Read[A], b: Read[B], c: Read[C], targetParallelism: TargetParallelism)(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B], arg3: Coder[C]): ReadBuilder[K, K, Void, (Iterable[A], Iterable[B], Iterable[C])]
Perform a 3-way SortedBucketScioContext.sortMergeCoGroup operation, then immediately apply a transformation function to the merged cogroups and re-write using the same bucketing key and hashing scheme.
Perform a 3-way SortedBucketScioContext.sortMergeCoGroup operation, then immediately apply a transformation function to the merged cogroups and re-write using the same bucketing key and hashing scheme. By applying the write, transform, and write in the same transform, an extra shuffle step can be avoided.
- Annotations
- @experimental()
- def sortMergeTransform[K1, K2, A, B](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B])(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B]): ReadBuilder[KV[K1, K2], K1, K2, (Iterable[A], Iterable[B])]
targetParallelism
defaults toTargetParallelism.auto()
targetParallelism
defaults toTargetParallelism.auto()
- Annotations
- @experimental()
- def sortMergeTransform[K1, K2, A, B](keyClass: Class[K1], keyClassSecondary: Class[K2], a: Read[A], b: Read[B], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[A], arg3: Coder[B]): ReadBuilder[KV[K1, K2], K1, K2, (Iterable[A], Iterable[B])]
Secondary keyed variant
Secondary keyed variant
- Annotations
- @experimental()
- def sortMergeTransform[K, A, B](keyClass: Class[K], a: Read[A], b: Read[B])(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B]): ReadBuilder[K, K, Void, (Iterable[A], Iterable[B])]
targetParallelism
defaults toTargetParallelism.auto()
targetParallelism
defaults toTargetParallelism.auto()
- Annotations
- @experimental()
- def sortMergeTransform[K, A, B](keyClass: Class[K], a: Read[A], b: Read[B], targetParallelism: TargetParallelism)(implicit arg0: Coder[K], arg1: Coder[A], arg2: Coder[B]): ReadBuilder[K, K, Void, (Iterable[A], Iterable[B])]
Perform a 2-way SortedBucketScioContext.sortMergeCoGroup operation, then immediately apply a transformation function to the merged cogroups and re-write using the same bucketing key and hashing scheme.
Perform a 2-way SortedBucketScioContext.sortMergeCoGroup operation, then immediately apply a transformation function to the merged cogroups and re-write using the same bucketing key and hashing scheme. By applying the write, transform, and write in the same transform, an extra shuffle step can be avoided.
- Annotations
- @experimental()
- def sortMergeTransform[K1, K2, R](keyClass: Class[K1], keyClassSecondary: Class[K2], read: Read[R], targetParallelism: TargetParallelism)(implicit arg0: Coder[K1], arg1: Coder[K2], arg2: Coder[R]): ReadBuilder[KV[K1, K2], K1, K2, Iterable[R]]
Secondary keyed variant
Secondary keyed variant
- Annotations
- @experimental()
- def sortMergeTransform[K, R](keyClass: Class[K], read: Read[R]): ReadBuilder[K, K, Void, Iterable[R]]
targetParallelism
defaults toTargetParallelism.auto()
targetParallelism
defaults toTargetParallelism.auto()
- Annotations
- @experimental()
- def sortMergeTransform[K, R](keyClass: Class[K], read: Read[R], targetParallelism: TargetParallelism): ReadBuilder[K, K, Void, Iterable[R]]
Perform a SortedBucketScioContext.sortMergeGroupByKey operation, then immediately apply a transformation function to the merged groups and re-write using the same bucketing key and hashing scheme.
Perform a SortedBucketScioContext.sortMergeGroupByKey operation, then immediately apply a transformation function to the merged groups and re-write using the same bucketing key and hashing scheme. By applying the write, transform, and write in the same transform, an extra shuffle step can be avoided.
- Annotations
- @experimental()
- final def synchronized[T0](arg0: => T0): T0
- Definition Classes
- AnyRef
- def toString(): String
- Definition Classes
- AnyRef → Any
- final def wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException])
- final def wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException])
- final def wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException]) @native()