-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathstart.sh
More file actions
executable file
·95 lines (80 loc) · 3.28 KB
/
start.sh
File metadata and controls
executable file
·95 lines (80 loc) · 3.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/bin/bash
# echo "=========================================="
# echo "Speech Analytics - Lambda Architecture"
# echo "Sustainable Development Data Processing"
# echo "=========================================="
# echo ""
# Colors
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
NC='\033[0m' # No Color
# Step 1: Start Docker infrastructure
echo -e "${YELLOW}Step 1: Starting Docker infrastructure...${NC}"
docker-compose up -d
echo "Waiting for services to be healthy..."
sleep 30
# Step 2: Create Cassandra schema
echo -e "${YELLOW}Step 2: Creating Cassandra schema...${NC}"
docker exec -i cassandra cqlsh -f ./schema.cql
echo -e "${GREEN}✓ Cassandra schema created${NC}"
# Step 3: Build Kafka Producer
echo -e "${YELLOW}Step 3: Building Kafka Producer...${NC}"
cd kafka-producer
mvn clean package -DskipTests
echo -e "${GREEN}✓ Kafka Producer built${NC}"
cd ..
# Step 4: Build Spark Processor
echo -e "${YELLOW}Step 4: Building Spark Processor...${NC}"
cd spark-processor
mvn clean package -DskipTests
echo -e "${GREEN}✓ Spark Processor built${NC}"
cd ..
# Copy Spark Processor JAR to spark-master
docker cp ./spark-processor/target/spark-processor-1.0.0.jar spark-master:/app/
# Step 5: Start Kafka Producer
echo -e "${YELLOW}Step 5: Starting Kafka Producer...${NC}"
cd kafka-producer
java -jar target/kafka-producer-1.0.0.jar ../data/cmu_us_clb_arctic &
PRODUCER_PID=$!
echo -e "${GREEN}✓ Kafka Producer started (PID: $PRODUCER_PID)${NC}"
cd ..
# Wait a bit for data to flow
sleep 10
# Step 6: Submit Spark Streaming Job
echo -e "${YELLOW}Step 6: Submitting Spark Streaming job...${NC}"
docker exec spark-master /spark/bin/spark-submit \
--class com.speech.streaming.SpeechStreamingProcessor \
--master spark://spark-master:7077 \
--packages com.datastax.spark:spark-cassandra-connector_2.12:3.4.1,org.apache.spark:spark-sql-kafka-0-10_2.12:3.3.0,org.apache.spark:spark-avro_2.12:3.3.0,io.confluent:kafka-avro-serializer:7.5.0 \
--repositories https://packages.confluent.io/maven/ \
--conf spark.cassandra.connection.host=cassandra \
/app/spark-processor-1.0.0.jar &
echo -e "${GREEN}✓ Spark Streaming job submitted${NC}"
# Wait for data to accumulate
sleep 45
# Step 7: Truncate counter tables and run batch job
echo -e "${YELLOW}Step 7: Running initial Batch processing...${NC}"
docker exec -it cassandra cqlsh -e "TRUNCATE speech_analytics.sustainability_keywords; TRUNCATE speech_analytics.phoneme_stats; TRUNCATE speech_analytics.speaker_analytics;" 2>/dev/null
docker exec spark-master /spark/bin/spark-submit \
--class com.speech.batch.BatchAggregationJob \
--master "local[*]" \
--packages com.datastax.spark:spark-cassandra-connector_2.12:3.4.1 \
--conf spark.cassandra.connection.host=cassandra \
/app/spark-processor-1.0.0.jar
echo -e "${GREEN}✓ Batch processing completed${NC}"
echo ""
echo "=========================================="
echo -e "${GREEN}All services started successfully!${NC}"
echo "=========================================="
echo ""
echo "Access Points:"
echo " - Kafka UI: http://localhost:9092"
echo " - Schema Registry: http://localhost:8081"
echo " - Spark Master UI: http://localhost:8080"
echo " - HDFS NameNode UI: http://localhost:9870"
echo " - Dashboard: http://localhost:8090"
echo ""
echo "To stop all services:"
echo " ./scripts/stop.sh"
echo ""