-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdocker-compose.yml
More file actions
293 lines (280 loc) · 8.4 KB
/
docker-compose.yml
File metadata and controls
293 lines (280 loc) · 8.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
services:
# Zookeeper pour Kafka
zookeeper:
image: confluentinc/cp-zookeeper:7.5.0
hostname: zookeeper
container_name: zookeeper
ports:
- "2181:2181"
environment:
ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_TICK_TIME: 2000
networks:
- streaming-network
volumes:
- ./data/zookeeper:/var/lib/zookeeper/data
- ./data/zookeeper-logs:/var/lib/zookeeper/log
healthcheck:
test: ["CMD", "/bin/sh", "-c", "echo ruok | nc localhost 2181"]
interval: 10s
timeout: 5s
retries: 5
# Kafka Broker
kafka:
image: confluentinc/cp-kafka:7.5.0
hostname: kafka
container_name: kafka
depends_on:
zookeeper:
condition: service_healthy
ports:
- "9092:9092"
- "9101:9101"
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0
KAFKA_CONFLUENT_METRICS_REPORTER_BOOTSTRAP_SERVERS: kafka:29092
KAFKA_CONFLUENT_METRICS_REPORTER_TOPIC_REPLICAS: 1
KAFKA_CONFLUENT_METRICS_ENABLE: 'false'
KAFKA_JMX_PORT: 9101
KAFKA_JMX_HOSTNAME: localhost
KAFKA_AUTO_CREATE_TOPICS_ENABLE: 'true'
CLUSTER_ID: 'streaming-cluster'
networks:
- streaming-network
volumes:
- ./data/kafka:/var/lib/kafka/data
- ./kafka-init.sh:/usr/local/bin/kafka-init.sh
entrypoint:
- /bin/bash
- -c
- |
rm -rf /var/lib/kafka/data
mkdir -p /var/lib/kafka/data
exec /etc/confluent/docker/run
healthcheck:
test: ["CMD", "kafka-broker-api-versions", "--bootstrap-server", "localhost:9092"]
interval: 10s
timeout: 5s
retries: 5
# Schema Registry
schema-registry:
image: confluentinc/cp-schema-registry:7.5.0
hostname: schema-registry
container_name: schema-registry
depends_on:
kafka:
condition: service_healthy
ports:
- "8081:8081"
environment:
SCHEMA_REGISTRY_HOST_NAME: schema-registry
SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS: 'kafka:29092'
SCHEMA_REGISTRY_LISTENERS: http://0.0.0.0:8081
networks:
- streaming-network
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8081/subjects"]
interval: 10s
timeout: 5s
retries: 5
# Kafka Connect
kafka-connect:
image: confluentinc/cp-kafka-connect:7.5.0
hostname: kafka-connect
container_name: kafka-connect
depends_on:
schema-registry:
condition: service_healthy
ports:
- "8083:8083"
environment:
CONNECT_BOOTSTRAP_SERVERS: 'kafka:29092'
CONNECT_REST_ADVERTISED_HOST_NAME: kafka-connect
CONNECT_GROUP_ID: compose-connect-group
CONNECT_CONFIG_STORAGE_TOPIC: docker-connect-configs
CONNECT_CONFIG_STORAGE_REPLICATION_FACTOR: 1
CONNECT_OFFSET_FLUSH_INTERVAL_MS: 10000
CONNECT_OFFSET_STORAGE_TOPIC: docker-connect-offsets
CONNECT_OFFSET_STORAGE_REPLICATION_FACTOR: 1
CONNECT_STATUS_STORAGE_TOPIC: docker-connect-status
CONNECT_STATUS_STORAGE_REPLICATION_FACTOR: 1
CONNECT_KEY_CONVERTER: org.apache.kafka.connect.storage.StringConverter
CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter
CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: http://schema-registry:8081
CONNECT_PLUGIN_PATH: "/usr/share/java,/usr/share/confluent-hub-components"
CONNECT_LOG4J_LOGGERS: org.apache.zookeeper=ERROR,org.I0Itec.zkclient=ERROR,org.reflections=ERROR
volumes:
- ./data/input:/data/input
- ./data/processed:/data/processed
networks:
- streaming-network
# Apache Flink JobManager
flink-jobmanager:
image: flink:1.18-scala_2.12
hostname: flink-jobmanager
container_name: flink-jobmanager
ports:
- "8088:8081"
command: jobmanager
environment:
- |
FLINK_PROPERTIES=
jobmanager.rpc.address: flink-jobmanager
state.backend: filesystem
state.checkpoints.dir: file:///tmp/flink-checkpoints
state.savepoints.dir: file:///tmp/flink-savepoints
networks:
- streaming-network
volumes:
- ./flink-jobs:/opt/flink/jobs
# Apache Flink TaskManager
flink-taskmanager:
image: flink:1.18-scala_2.12
hostname: flink-taskmanager
depends_on:
- flink-jobmanager
command: taskmanager
deploy:
replicas: 2
environment:
- |
FLINK_PROPERTIES=
jobmanager.rpc.address: flink-jobmanager
taskmanager.numberOfTaskSlots: 4
state.backend: filesystem
state.checkpoints.dir: file:///tmp/flink-checkpoints
networks:
- streaming-network
# ClickHouse
clickhouse:
image: clickhouse/clickhouse-server:23.8
hostname: clickhouse
container_name: clickhouse
ports:
- "8123:8123"
- "9000:9000"
environment:
CLICKHOUSE_DB: streaming_demo
CLICKHOUSE_USER: admin
CLICKHOUSE_PASSWORD: admin
CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1
volumes:
- ./clickhouse/init:/docker-entrypoint-initdb.d
- ./clickhouse/config.xml:/etc/clickhouse-server/config.d/cors-headers.xml
- ./data/clickhouse:/var/lib/clickhouse
networks:
- streaming-network
ulimits:
nofile:
soft: 262144
hard: 262144
# Apache Superset (BI & OLAP)
superset:
image: apache/superset:3.0.0
hostname: superset
container_name: superset
ports:
- "8089:8088"
environment:
SUPERSET_SECRET_KEY: 'streaming_first_demo_secret_key_change_in_prod'
SUPERSET_LOAD_EXAMPLES: 'no'
volumes:
- ./superset/superset_config.py:/app/pythonpath/superset_config.py
- ./superset/init-superset.sh:/app/docker/init-superset.sh
- ./data/superset:/app/superset_home
networks:
- streaming-network
depends_on:
- clickhouse
command: >
bash -c "
pip install clickhouse-connect &&
/app/docker/docker-bootstrap.sh app-gunicorn
"
# File Generator (simulation de données - génère CSV/JSON)
file-generator:
build:
context: ./file-generator
dockerfile: Dockerfile
container_name: file-generator
depends_on:
schema-registry:
condition: service_healthy
volumes:
- ./data/input:/output
environment:
OUTPUT_DIR: /output
GENERATION_INTERVAL: 30
networks:
- streaming-network
# CSV to Kafka Avro Producer (lit CSV et envoie en Avro à Kafka)
csv-to-kafka-producer:
build:
context: ./csv-to-kafka-producer
dockerfile: Dockerfile
container_name: csv-to-kafka-producer
depends_on:
schema-registry:
condition: service_healthy
file-generator:
condition: service_started
volumes:
- ./data/input:/data/input
- ./data/processed:/data/processed
- ./schemas:/schemas:ro
environment:
KAFKA_BOOTSTRAP_SERVERS: kafka:29092
SCHEMA_REGISTRY_URL: http://schema-registry:8081
INPUT_DIR: /data/input
PROCESSED_DIR: /data/processed
POLL_INTERVAL: 5
networks:
- streaming-network
# Streaming Real-time Avro Producer (génère et envoie en Avro temps réel)
streaming-producer:
build:
context: ./streaming-producer
dockerfile: Dockerfile
container_name: streaming-producer
depends_on:
schema-registry:
condition: service_healthy
volumes:
- ./schemas:/schemas:ro
environment:
KAFKA_BOOTSTRAP_SERVERS: kafka:29092
SCHEMA_REGISTRY_URL: http://schema-registry:8081
GENERATION_INTERVAL: 30.0
BATCH_SIZE: 5
networks:
- streaming-network
# Kafka UI (pour monitoring)
kafka-ui:
image: provectuslabs/kafka-ui:latest
container_name: kafka-ui
ports:
- "8080:8080"
environment:
KAFKA_CLUSTERS_0_NAME: local
KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:29092
KAFKA_CLUSTERS_0_SCHEMAREGISTRY: http://schema-registry:8081
KAFKA_CLUSTERS_0_KAFKACONNECT_0_NAME: connect
KAFKA_CLUSTERS_0_KAFKACONNECT_0_ADDRESS: http://kafka-connect:8083
networks:
- streaming-network
depends_on:
- kafka
- schema-registry
- kafka-connect
networks:
streaming-network:
driver: bridge
volumes:
kafka-data:
clickhouse-data:
superset-data: