Source code for pyschema_extensions.luigi

# Copyright (c) 2013 Spotify AB
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.

"""Basic utilities for using PySchema in Luigi's Python MR lib
"""
import sys
from pyschema import core


[docs]def mr_reader(job, input_stream, loads=core.loads): """ Converts a file object with json serialised pyschema records to a stream of pyschema objects Can be used as job.reader in luigi.hadoop.JobTask """ for line in input_stream: yield loads(line),
[docs]def mr_writer(job, outputs, output_stream, stderr=sys.stderr, dumps=core.dumps): """ Writes a stream of json serialised pyschema Records to a file object Can be used as job.writer in luigi.hadoop.JobTask """ for output in outputs: try: print >> output_stream, dumps(output) except core.ParseError, e: print >> stderr, e raise