Skip to content

Instantly share code, notes, and snippets.

@cipri7329
Last active October 19, 2016 13:45
Show Gist options
  • Save cipri7329/5f41b1f4aafe884802fe7b0e137e40c1 to your computer and use it in GitHub Desktop.
Save cipri7329/5f41b1f4aafe884802fe7b0e137e40c1 to your computer and use it in GitHub Desktop.
flume spooldir hdfs
wikiagent.sources = spool
wikiagent.channels = memChannel
wikiagent.sinks = HDFS
# source config
wikiagent.sources.spool.type = spooldir
wikiagent.sources.spool.channels = memChannel
wikiagent.sources.spool.spoolDir = /home/ubuntu/datalake/processed
wikiagent.sources.spool.batchSize = 150
wikiagent.sources.spool.ignorePattern = ^*.loading$
wikiagent.sources.spool.deletePolicy = never
wikiagent.sources.spool.fileSuffix = .COMPLETED
wikiagent.sources.spool.fileHeader = true
wikiagent.sources.spool.fileHeaderKey = file
wikiagent.sources.spool.deserializer = line
# channel config
wikiagent.channels.memChannel.type = memory
wikiagent.channels.memChannel.capacity = 2000
wikiagent.channels.memChannel.transactionCapacity = 500
# sink config
wikiagent.sinks.HDFS.type = hdfs
wikiagent.sinks.HDFS.hdfs.path = hdfs://localhost:8020/user/datalake/wiki-flume
wikiagent.sinks.HDFS.hdfs.useLocalTimeStamp = true
wikiagent.sinks.HDFS.hdfs.fileType = DataStream
wikiagent.sinks.HDFS.hdfs.inUsePrefix = .
wikiagent.sinks.HDFS.hdfs.filePrefix = wiki
wikiagent.sinks.HDFS.hdfs.fileSuffix = .csv
wikiagent.sinks.HDFS.hdfs.batchSize = 500
wikiagent.sinks.HDFS.hdfs.rollInterval = 0
wikiagent.sinks.HDFS.hdfs.rollCount = 0
wikiagent.sinks.HDFS.hdfs.rollSize = 0
wikiagent.sinks.HDFS.hdfs.idleTimeout = 60
wikiagent.sinks.HDFS.hdfs.callTimeout = 25000
wikiagent.sinks.HDFS.serializer = TEXT
# Binding the source and sink to the channel
wikiagent.sinks.HDFS.channel = memChannel
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment