This extends Apache Spark local mode read from AWS S3 bucket with Docker. Step 1: The “docker-compose.yml” with minio to emulate AWS S3, MySQL DB, Spark master and Spark worker to form a cluster. “mysql” is the hostname.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | version: "2" services: s3: container_name: s3 image: minio/minio:RELEASE.2018-05-04T23-13-12Z environment: MINIO_ACCESS_KEY: user MINIO_SECRET_KEY: password ports: - 9000:9000 volumes: - ./docker/s3/data:/data - ./docker/s3/config:/root/.minio networks: - infra command: server /data mysql: container_name: mysql image: mysql:5.7.21 ports: - 3306:3306 volumes: - ./docker/mysql/data:/var/lib/mysql environment: MYSQL_ROOT_PASSWORD: password networks: - infra spark-master: build: dockerfile: spark.dockerfile context: ./docker/spark command: bin/spark-class org.apache.spark.deploy.master.Master -h spark-master hostname: spark-master environment: MASTER: spark://spark-master:7077 SPARK_CONF_DIR: /conf SPARK_PUBLIC_DNS: 127.0.0.1 expose: - 7001 - 7002 - 7003 - 7004 - 7005 - 7006 - 7077 - 6066 - 5005 ports: - 5005:5005 - 6066:6066 - 7077:7077 - 8080:8080 volumes: - ./docker/spark/spark-master:/conf - ./data:/tmp/data networks: - infra spark-worker-1: build: dockerfile: spark.dockerfile context: ./docker/spark command: bin/spark-class org.apache.spark.deploy.worker.Worker spark://spark-master:7077 hostname: spark-worker-1 environment: SPARK_CONF_DIR: /conf SPARK_PUBLIC_DNS: 127.0.0.1 SPARK_WORKER_CORES: 2 SPARK_WORKER_MEMORY: 2g SPARK_WORKER_PORT: 8881 SPARK_WORKER_WEBUI_PORT: 8081 links: - spark-master expose: - 7012 - 7013 - 7014 - 7015 - 7016 - 8881 ports: - 8081:8081 volumes: - ./conf/spark-worker-1:/conf - ./data:/tmp/data networks: - infra networks: infra: external: name: docker_test_infra |
Step…