Skip to content

Instantly share code, notes, and snippets.

@mudassaralichouhan
Last active September 8, 2025 22:39
Show Gist options
  • Select an option

  • Save mudassaralichouhan/a6662732c529b709021eab6d25475643 to your computer and use it in GitHub Desktop.

Select an option

Save mudassaralichouhan/a6662732c529b709021eab6d25475643 to your computer and use it in GitHub Desktop.
search-engine-core
# Build stage
FROM ubuntu:22.04 AS builder
#FROM ghcr.io/hatef-ir/mongodb-server:latest as builder
# Prevent interactive prompts during package installation
ENV DEBIAN_FRONTEND=noninteractive
# Install additional build dependencies
RUN apt-get update && apt-get install -y \
git \
python3 \
python3-pip \
&& rm -rf /var/lib/apt/lists/*
#gnupg \
# Add MongoDB repository key properly
#RUN curl -fsSL https://pgp.mongodb.com/server-6.0.asc | \
# gpg -o /usr/share/keyrings/mongodb-server-6.0.gpg --dearmor
# Debug: Find MongoDB driver files
# Cache bust for CMake update
ARG CACHEBUST=1
# Install build dependencies and CMake
RUN apt-get update && apt-get install -y \
build-essential \
libssl-dev \
zlib1g-dev \
wget \
curl \
gnupg \
lsb-release && \
# Install newer CMake version
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null && \
echo "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main" | tee /etc/apt/sources.list.d/kitware.list >/dev/null && \
apt-get update && \
apt-get install -y cmake && \
# Verify installation
cmake --version
# Install nlohmann_json (required by search_core)
RUN apt-get update && apt-get install -y nlohmann-json3-dev && rm -rf /var/lib/apt/lists/*
RUN echo "Skipping early MongoDB driver probe; drivers will be installed next."
# Build and install uSockets with SSL support
WORKDIR /deps
RUN git clone --depth 1 https://github.com/uNetworking/uSockets.git
WORKDIR /deps/uSockets
RUN make WITH_OPENSSL=1 -j$(nproc) && \
mkdir -p /usr/local/include/uSockets && \
cp src/*.h /usr/local/include/uSockets/ && \
cp uSockets.a /usr/local/lib/libuSockets.a && \
ln -sf /usr/local/include/uSockets/libusockets.h /usr/local/include/libusockets.h
# Clone and build uWebSockets (use a specific stable version)
WORKDIR /deps
RUN git clone --recurse-submodules --branch v20.42.0 https://github.com/uNetworking/uWebSockets.git
WORKDIR /deps/uWebSockets
# Build uWebSockets without examples and without LTO to avoid compilation issues
RUN make -j1 WITH_EXAMPLES=0 LTO=0
# Install uWebSockets headers to system include path
RUN mkdir -p /usr/local/include/uwebsockets && \
cp -r src/* /usr/local/include/uwebsockets/ && \
mkdir -p /usr/local/include/usockets && \
cp -r uSockets/src/* /usr/local/include/usockets/ && \
ln -sf /usr/local/include/usockets/libusockets.h /usr/local/include/libusockets.h
# Build system libgtest (required for Gumbo make check)
RUN apt-get update && \
apt-get install -y software-properties-common && \
add-apt-repository universe && \
apt-get update && \
apt-get install -y --no-install-recommends \
git build-essential ca-certificates \
autotools-dev autoconf automake libtool-bin pkg-config \
cmake libgtest-dev
WORKDIR /usr/src/gtest
RUN cmake . -DCMAKE_POLICY_VERSION_MINIMUM=3.5 && make
# Install Catch2 v3 for testing
WORKDIR /deps
RUN git clone --depth 1 --branch v3.4.0 https://github.com/catchorg/Catch2.git
WORKDIR /deps/Catch2
RUN mkdir build && \
cd build && \
cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=OFF -DCMAKE_INSTALL_PREFIX=/usr/local && \
make -j$(nproc) && \
make install && \
ldconfig
# Install Gumbo Parser
WORKDIR /src
RUN git clone --depth 1 https://github.com/google/gumbo-parser.git
# ---- build gumbo ----
WORKDIR /src/gumbo-parser
RUN ./autogen.sh && ./configure && make && make check && make install && ldconfig
# Install hiredis (required for redis-plus-plus) - using CMAKE to generate config files
WORKDIR /deps
RUN git clone https://github.com/redis/hiredis.git
WORKDIR /deps/hiredis
RUN mkdir build && \
cd build && \
cmake .. -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local -DENABLE_SSL=ON && \
make -j$(nproc) && \
make install && \
ldconfig
# Install redis-plus-plus
WORKDIR /deps
RUN git clone https://github.com/sewenew/redis-plus-plus.git
WORKDIR /deps/redis-plus-plus
RUN mkdir build
WORKDIR /deps/redis-plus-plus/build
RUN cmake .. \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=/usr/local \
-DCMAKE_PREFIX_PATH=/usr/local \
-DCMAKE_CXX_STANDARD=20 \
-DREDIS_PLUS_PLUS_CXX_STANDARD=20 \
-DREDIS_PLUS_PLUS_BUILD_TEST=OFF \
-DREDIS_PLUS_PLUS_BUILD_STATIC=ON \
-DREDIS_PLUS_PLUS_BUILD_SHARED=ON && \
make -j$(nproc) && \
make install && \
ldconfig
RUN apt-get update && apt-get install -y gnupg curl
# Install MongoDB C driver
RUN wget https://github.com/mongodb/mongo-c-driver/releases/download/1.30.3/mongo-c-driver-1.30.3.tar.gz && \
tar xzf mongo-c-driver-1.30.3.tar.gz && \
cd mongo-c-driver-1.30.3 && \
mkdir cmake-build && \
cd cmake-build && \
cmake -DENABLE_AUTOMATIC_INIT_AND_CLEANUP=OFF .. && \
cmake --build . && \
cmake --build . --target install && \
cd ../.. && \
rm -rf mongo-c-driver-1.30.3.tar.gz mongo-c-driver-1.30.3
# Install MongoDB C++ driver
RUN wget https://github.com/mongodb/mongo-cxx-driver/releases/download/r4.0.0/mongo-cxx-driver-r4.0.0.tar.gz && \
tar xzf mongo-cxx-driver-r4.0.0.tar.gz && \
cd mongo-cxx-driver-r4.0.0 && \
mkdir cmake-build && \
cd cmake-build && \
cmake .. \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX=/usr/local \
-DCMAKE_PREFIX_PATH=/usr/local \
-DBSONCXX_POLY_USE_BOOST=0 \
-DCMAKE_CXX_STANDARD=20 && \
cmake --build . && \
cmake --build . --target install && \
cd ../.. && \
rm -rf mongo-cxx-driver-r4.0.0.tar.gz mongo-cxx-driver-r4.0.0
RUN apt-get update && apt-get install -y libcurl4-openssl-dev redis-tools
RUN apt-get update && apt-get install -y \
libwebsocketpp-dev \
libboost-system-dev \
libboost-thread-dev \
libssl-dev \
libasio-dev \
librdkafka-dev && \
rm -rf /var/lib/apt/lists/*
# Add ddebs repo
# RUN tee /etc/apt/sources.list.d/ddebs.list <<EOF
# deb http://ddebs.ubuntu.com $(lsb_release -cs) main restricted universe multiverse
# deb http://ddebs.ubuntu.com $(lsb_release -cs)-updates main restricted universe multiverse
# deb http://ddebs.ubuntu.com $(lsb_release -cs)-security main restricted universe multiverse
# # Optional -proposed
# # deb http://ddebs.ubuntu.com $(lsb_release -cs)-proposed main restricted universe multiverse
# EOF
# RUN apt update && apt install -y libcurl4-openssl-dev-dbgsym
RUN ANTICASH=6
# Set up project build
WORKDIR /deps
COPY src/ /deps/src/
COPY tests/ /deps/tests/
COPY CMakeLists.txt /deps/
COPY include/ /deps/include/
# uWebSockets and uSockets are now installed system-wide, no need to copy
# Build using CMake
RUN rm -rf build && \
mkdir build && \
cd build && \
cmake .. \
-DCMAKE_BUILD_TYPE=Debug \
-DCMAKE_CXX_STANDARD=20 \
-DCMAKE_CXX_STANDARD_REQUIRED=ON \
-DCMAKE_CXX_EXTENSIONS=OFF \
-DPkgConfig_DIR=/usr/share/pkgconfig \
-DCMAKE_PREFIX_PATH="/usr/local/lib/cmake/mongocxx-4.0.0;/usr/local/lib/cmake/bsoncxx-4.0.0" \
-DBUILD_TESTS=OFF && \
make -j$(nproc)
COPY public/ /app/public/
COPY locales/ /app/locales/
COPY templates/ /app/templates/
COPY config/ /app/config/
# Run tests after build - crawler tests first, then all tests
# RUN cd build && \
# echo "Running crawler tests first..." && \
# (ctest --test-dir . -R crawler --verbose || true) && \
# echo "Running all tests..." && \
# (ctest --test-dir . --verbose || true) && \
# echo "Test execution completed"
# Copy the startup script
# COPY start.sh /app/start.sh
# RUN chmod +x /app/start.sh
RUN echo "Using Ubuntu base image"
# Runtime stage
FROM ubuntu:22.04 AS runner
RUN apt-get update && apt-get install -y \
librdkafka1 librdkafka-dev \
libcurl4 ca-certificates && \
rm -rf /var/lib/apt/lists/*
ENV LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH}
# Set default port
ENV PORT=3000
ENV MINIFY_JS=true
# Set default Redis configuration for search
ENV SEARCH_REDIS_URI=tcp://127.0.0.1:6379
ENV SEARCH_REDIS_POOL_SIZE=4
ENV SEARCH_INDEX_NAME=search_index
# Set default template configuration
ENV TEMPLATES_PATH=/app/config/templates
WORKDIR /app
# Create necessary directories with proper permissions
# Copy the built binary from the builder stage
COPY --from=builder /deps/build/server ./server
RUN chmod +x ./server
# Copy Gumbo library files from builder stage
COPY --from=builder /usr/local/lib/libgumbo.so* /usr/local/lib/
COPY --from=builder /usr/local/include/gumbo.h /usr/local/include/
COPY --from=builder /usr/local/include/tag_enum.h /usr/local/include/
# Copy hiredis library files from builder stage
COPY --from=builder /usr/local/lib/libhiredis.so* /usr/local/lib/
COPY --from=builder /usr/local/include/hiredis/ /usr/local/include/hiredis/
# Copy redis-plus-plus library files from builder stage
COPY --from=builder /usr/local/lib/libredis++.so* /usr/local/lib/
COPY --from=builder /usr/local/lib/libredis++.a /usr/local/lib/
COPY --from=builder /usr/local/include/sw/ /usr/local/include/sw/
# Copy MongoDB C and C++ driver libraries from builder stage
COPY --from=builder /usr/local/lib/libmongocxx.so* /usr/local/lib/
COPY --from=builder /usr/local/lib/libbsoncxx.so* /usr/local/lib/
COPY --from=builder /usr/local/lib/libmongoc-1.0.so* /usr/local/lib/
COPY --from=builder /usr/local/lib/libbson-1.0.so* /usr/local/lib/
# COPY --from=builder /usr/local/lib/libmongocxx.so* /usr/local/lib/
# COPY --from=builder /usr/local/lib/libbsoncxx.so* /usr/local/lib/
# COPY --from=builder /usr/local/lib/libredis++.so* /usr/local/lib/
# COPY --from=builder /usr/local/lib/libhiredis.so* /usr/local/lib/
# COPY --from=builder /usr/local/lib/libgumbo.so* /usr/local/lib/
# COPY --from=builder /usr/local/lib/libuSockets.a /usr/local/lib/
# # Copy headers
# COPY --from=builder /usr/local/include/mongocxx /usr/local/include/mongocxx
# COPY --from=builder /usr/local/include/bsoncxx /usr/local/include/bsoncxx
# COPY --from=builder /usr/local/include/sw /usr/local/include/sw
# COPY --from=builder /usr/local/include/hiredis /usr/local/include/hiredis
# COPY --from=builder /usr/local/include/gumbo.h /usr/local/include/
# COPY --from=builder /usr/local/include/uwebsockets /usr/local/include/uwebsockets
# COPY --from=builder /usr/local/include/uSockets /usr/local/include/uSockets
# Update library cache
RUN ldconfig
# Copy public folder from builder stage
COPY --from=builder /app/public ./public
COPY --from=builder /app/locales ./locales
COPY --from=builder /app/templates ./templates
COPY --from=builder /app/config ./config
# Copy the startup script
COPY scripts/start.sh /app/start.sh
RUN chmod +x /app/start.sh
RUN dir
# Expose the port
EXPOSE ${PORT}
# Set the entrypoint to the startup script
ENTRYPOINT ["/app/start.sh"]
# Simple mongosh test to verify MongoDB is available
# RUN echo "db.runCommand({ ping: 1 })" > /tmp/mongosh_test.js && \
# mongosh --file /tmp/mongosh_test.js || (echo "⚠️ mongosh test failed, but continuing build"; exit 0)
# Start MongoDB
# CMD ["mongodsh", "--fork", "--logpath", "/var/log/mongodb.log"]
# ENTRYPOINT ["/bin/bash", "-c", "echo 'Container started for test'; exec sleep infinity"]
services:
# zookeeper:
# image: bitnami/zookeeper:3.9
# container_name: zookeeper
# restart: unless-stopped
# environment:
# - ALLOW_ANONYMOUS_LOGIN=yes
# ports:
# - "2181:2181"
# networks:
# - search-network
# kafka:
# image: bitnami/kafka:3.7
# container_name: kafka
# restart: unless-stopped
# depends_on:
# - zookeeper
# environment:
# - KAFKA_CFG_ZOOKEEPER_CONNECT=zookeeper:2181
# - KAFKA_CFG_LISTENERS=PLAINTEXT://:9092
# - KAFKA_CFG_ADVERTISED_LISTENERS=PLAINTEXT://kafka:9092
# - KAFKA_CFG_AUTO_CREATE_TOPICS_ENABLE=true
# - ALLOW_PLAINTEXT_LISTENER=yes
# ports:
# - "9092:9092"
# healthcheck:
# test: ["CMD", "bash", "-c", "kafka-topics.sh --bootstrap-server localhost:9092 --list || exit 1"]
# interval: 10s
# timeout: 5s
# retries: 10
# networks:
# - search-network
search-engine:
build:
context: .
dockerfile: Dockerfile
platforms:
- linux/arm64
container_name: core
restart: "no" # Disable auto-restart for debugging
ports:
- "3000:3000"
environment:
- MONGODB_URI=mongodb://admin:password123@mongodb:27017
- SEARCH_REDIS_URI=tcp://redis:6379
- SEARCH_REDIS_POOL_SIZE=4
- SEARCH_INDEX_NAME=search_index
- MINIFY_JS=true
- MINIFY_JS_LEVEL=none # Use microservice instead
- JS_MINIFIER_SERVICE_URL=http://js-minifier:3002
# Cache configuration
- JS_CACHE_ENABLED=true
- JS_CACHE_TYPE=redis
- JS_CACHE_TTL=3600
- JS_CACHE_REDIS_DB=1
# Template configuration
- TEMPLATES_PATH=/app/config/templates
# Debug settings
- DEBUG=1
- LOG_LEVEL=DEBUG
# - KAFKA_BOOTSTRAP_SERVERS=kafka:9092
# - KAFKA_FRONTIER_TOPIC=crawl.frontier
volumes:
- ./config/templates:/app/config/templates:rw
depends_on:
- redis
- mongodb
- js-minifier
# - kafka
networks:
- search-network
dns:
- 8.8.8.8
- 1.1.1.1
- 8.8.4.4
js-minifier:
build:
context: ./js-minifier-service
dockerfile: Dockerfile
container_name: js-minifier
restart: unless-stopped
ports:
- "3002:3002"
environment:
- NODE_ENV=production
- PORT=3002
networks:
- search-network
healthcheck:
test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:3002/health || exit 1"]
interval: 30s
timeout: 10s
retries: 3
start_period: 10s
redis:
image: redis:7.0-alpine
platform: linux/amd64
container_name: redis
restart: unless-stopped
ports:
- "6379:6379"
volumes:
- redis_data:/data
command: ["redis-server", "--appendonly", "yes", "--maxmemory", "256mb", "--maxmemory-policy", "allkeys-lru"]
environment:
- REDIS_MAXMEMORY=268435456 # 256MB in bytes
- REDIS_MAXMEMORY_POLICY=allkeys-lru
networks:
- search-network
mongodb:
image: mongo:4.4
platform: linux/amd64
container_name: mongodb_test
restart: unless-stopped
ports:
- "27017:27017"
volumes:
- mongodb_data:/data/db
environment:
- MONGO_INITDB_ROOT_USERNAME=admin
- MONGO_INITDB_ROOT_PASSWORD=password123
networks:
- search-network
browserless:
image: browserless/chrome:1.61-chrome-stable
platform: linux/amd64
container_name: browserless
restart: unless-stopped
ports:
- "3001:3000"
environment:
- "MAX_CONCURRENT_SESSIONS=10"
- "PREBOOT_CHROME=true"
- "CONNECTION_TIMEOUT=15000"
- "CHROME_REFRESH_TIME=60000"
- "QUEUE_LIMIT=100"
- "MAX_CPU_PERCENT=90"
- "MAX_MEMORY_PERCENT=90"
- "KEEP_ALIVE=true"
- "ENABLE_DEBUGGER=false"
- "ENABLE_CORS=true"
- "WORKSPACE_DIR=/workspace"
- "FUNCTION_ENABLE_INCOGNITO=false"
- "FUNCTION_KEEP_ALIVE=true"
networks:
- search-network
dns:
- 8.8.8.8
- 1.1.1.1
- 8.8.4.4
deploy:
resources:
limits:
memory: 2G
reservations:
memory: 1G
# mongodb-test:
# build:
# context: .
# dockerfile: Dockerfile.test.mongo
# depends_on:
# - mongodb
# environment:
# - MONGODB_URI=mongodb://mongodb:27017
networks:
search-network:
driver: bridge
volumes:
mongodb_data:
redis_data:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment