Skip to content

Instantly share code, notes, and snippets.

@arthurpham
Last active February 6, 2018 16:59
Show Gist options
  • Save arthurpham/88c12ce71664c90095a0ea0ec0212bac to your computer and use it in GitHub Desktop.
Save arthurpham/88c12ce71664c90095a0ea0ec0212bac to your computer and use it in GitHub Desktop.
For Rancher # This compose file stands up Scrapy Cluster with an # associated ELK Stack. You should run a few crawls and then import the # `export.json` file into your Kibana objects
logstash-crawler:
labels:
io.rancher.container.pull_image: always
io.rancher.scheduler.affinity:container_label_soft_ne: io.rancher.stack_service.name=$$$${stack_name}/$$$${service_name}
io.rancher.scheduler.affinity:host_label: role=crawler
command:
- -f
- /etc/logstash/conf.d/logstash.conf
image: arthurpham/logstash-scrapy-cluster-docker:latest
links:
- 'kafka:'
- 'redis:'
- 'zookeeper:'
- 'elasticsearch:'
volumes_from:
- crawler
redis:
labels:
io.rancher.scheduler.affinity:host_label: role=redis
image: redis:latest
volumes:
- redisdata:/data
kafka-monitor:
environment:
LOG_DIR: /var/log/scrapy-cluster
LOG_JSON: 'True'
LOG_STDOUT: 'False'
REDIS_PORT: '6379'
labels:
io.rancher.sidekicks: logstash-kafka-monitor
io.rancher.scheduler.affinity:host_label: role=kafka_zookeeper
image: istresearch/scrapy-cluster:kafka-monitor-dev-alpine
links:
- 'kafka:'
- 'redis:'
- 'elasticsearch:'
volumes:
- /var/log/scrapy-cluster
logstash-redis-monitor:
labels:
io.rancher.container.pull_image: always
io.rancher.scheduler.affinity:host_label: role=redis
command:
- -f
- /etc/logstash/conf.d/logstash.conf
image: arthurpham/logstash-scrapy-cluster-docker:latest
links:
- 'kafka:'
- 'redis:'
- 'zookeeper:'
- 'elasticsearch:'
volumes_from:
- redis-monitor
zookeeper:
labels:
io.rancher.scheduler.affinity:host_label: role=kafka_zookeeper
image: wurstmeister/zookeeper:latest
logstash-kafka-monitor:
labels:
io.rancher.container.pull_image: always
io.rancher.scheduler.affinity:host_label: role=kafka_zookeeper
command:
- -f
- /etc/logstash/conf.d/logstash.conf
image: arthurpham/logstash-scrapy-cluster-docker:latest
links:
- 'kafka:'
- 'redis:'
- 'elasticsearch:'
volumes_from:
- kafka-monitor
kafka:
environment:
KAFKA_ADVERTISED_HOST_NAME: kafka
KAFKA_ADVERTISED_PORT: '9092'
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: '1'
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_LOG_RETENTION_BYTES: '1073741824'
KAFKA_LOG_SEGMENT_BYTES: '536870912'
KAFKA_LOG_RETENTION_CHECK_INTERVAL: '60000'
labels:
io.rancher.scheduler.affinity:container_label_soft_ne: io.rancher.stack_service.name=$${stack_name}/elasticsearch
io.rancher.scheduler.affinity:host_label: role=kafka_zookeeper
image: wurstmeister/kafka:latest
links:
- 'zookeeper:'
volumes:
- /var/run/docker.sock:/var/run/docker.sock
kibana:
ports:
- 5601:5601/tcp
environment:
NODE_OPTIONS: --max-old-space-size=200
labels:
io.rancher.container.pull_image: always
io.rancher.scheduler.affinity:host_label: role=elk
image: kibana:4
links:
- 'elasticsearch:'
crawler:
environment:
REDIS_PORT: '6379'
SC_LOG_DIR: /var/log/scrapy-cluster
SC_LOG_JSON: 'True'
SC_LOG_STDOUT: 'False'
QUEUE_HITS: '20'
QUEUE_WINDOW: '60'
labels:
io.rancher.sidekicks: logstash-crawler
io.rancher.scheduler.affinity:container_label_soft_ne: io.rancher.stack_service.name=$$$${stack_name}/$$$${service_name}
io.rancher.scheduler.affinity:host_label: role=crawler
image: istresearch/scrapy-cluster:crawler-dev-alpine
links:
- 'kafka:'
- 'redis:'
- 'zookeeper:'
- 'elasticsearch:'
volumes:
- /var/log/scrapy-cluster
redis-monitor:
environment:
LOG_DIR: /var/log/scrapy-cluster
LOG_JSON: 'True'
LOG_STDOUT: 'False'
REDIS_PORT: '6379'
labels:
io.rancher.sidekicks: logstash-redis-monitor
io.rancher.scheduler.affinity:host_label: role=redis
image: istresearch/scrapy-cluster:redis-monitor-dev-alpine
links:
- 'kafka:'
- 'redis:'
- 'zookeeper:'
- 'elasticsearch:'
volumes:
- /var/log/scrapy-cluster
elasticsearch:
environment:
ES_JAVA_OPTS: -Xms1g -Xmx1g
ES_NETWORK_HOST: 0.0.0.0
labels:
io.rancher.scheduler.affinity:container_label_soft_ne: io.rancher.stack_service.name=$${stack_name}/kafka
io.rancher.container.pull_image: always
io.rancher.scheduler.affinity:host_label: role=elk
command:
- elasticsearch
image: elasticsearch:2
volumes:
- esdata:/usr/share/elasticsearch/data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment