Skip to content

Instantly share code, notes, and snippets.

@sycomix
Last active July 14, 2025 19:25
Show Gist options
  • Select an option

  • Save sycomix/51a1ca264abe77673636b7201401dbd6 to your computer and use it in GitHub Desktop.

Select an option

Save sycomix/51a1ca264abe77673636b7201401dbd6 to your computer and use it in GitHub Desktop.
tika build and install server from source
#!/bin/bash
#==============================================================================
# Download, Compile, and Install Apache Tika Server (Definitive Edition)
#
# This script fully automates the setup of a full-featured Apache Tika server.
# It uses a robust two-step process to guarantee ALL modules and dependencies
# are correctly packaged before installing the application as a system service.
#
# Prerequisites:
# - This script must be run with root privileges (e.g., using 'sudo').
# - 'git' and 'maven' must be installed on the system.
#
#==============================================================================
# --- Configuration ---
set -e
SOURCE_DIR="tika"
BUILD_DIR="tika_server_build"
INSTALL_DIR="/opt/tika-server-full"
SERVICE_USER="tika"
SERVICE_NAME="tika-server.service"
SERVER_PORT="9998"
# --- Script Body ---
# 1. Privilege and Dependency Checks
echo "INFO: Checking prerequisites..."
if [ "$(id -u)" -ne 0 ]; then
echo "ERROR: This script must be run as root or with sudo."
exit 1
fi
if ! command -v git &> /dev/null; then
echo "ERROR: 'git' is not installed. Please install it first (e.g., 'sudo apt install git')."
exit 1
fi
if ! command -v mvn &> /dev/null; then
echo "ERROR: 'maven' is not installed. Please install it first (e.g., 'sudo apt install maven')."
exit 1
fi
echo "INFO: Prerequisites met."
# 2. Download and Compile Apache Tika
echo "INFO: Preparing to download and compile Apache Tika..."
if [ -d "$SOURCE_DIR" ]; then
echo "INFO: Removing existing Tika source directory to ensure a fresh clone."
rm -rf "$SOURCE_DIR"
fi
echo "INFO: Cloning the main branch of Apache Tika source code from GitHub..."
git clone --depth 1 -b main https://github.com/apache/tika.git "$SOURCE_DIR" && cd "$SOURCE_DIR" git checkout TIKA-4345-v2 && cd ..
echo "INFO: Compiling Apache Tika with Maven. This will take several minutes..."
mvn -f "$SOURCE_DIR/pom.xml" install -DskipTests
echo "INFO: Compilation complete."
# 3. Start Packaging Process
echo "INFO: Starting the Tika Server (Full-Featured) packaging process..."
if [ -d "$BUILD_DIR" ]; then
echo "INFO: Removing existing temporary build directory: $BUILD_DIR"
rm -rf "$BUILD_DIR"
fi
echo "INFO: Creating temporary build directory at '$BUILD_DIR'..."
mkdir -p "$BUILD_DIR/bin"
mkdir -p "$BUILD_DIR/lib"
mkdir -p "$BUILD_DIR/conf"
# 4. Assemble the 'lib' directory with ALL necessary JARs
echo "INFO: Assembling application libraries..."
# --- START: DEFINITIVE 2-STEP ASSEMBLY ---
# STEP 1: Copy all third-party dependencies from 'tika-app'.
echo "INFO: Step 1 of 2: Copying all third-party dependency JARs..."
mvn -f "$SOURCE_DIR/pom.xml" -pl org.apache.tika:tika-app dependency:copy-dependencies -DoutputDirectory="$BUILD_DIR/lib"
# STEP 2: Find and copy all of Tika's own modules to ensure none are missed.
echo "INFO: Step 2 of 2: Copying all Tika-specific module JARs (including server components)..."
find "$SOURCE_DIR" -path "*/target/tika-*.jar" -type f \
-not -name "*-tests.jar" \
-not -name "*-sources.jar" \
-not -name "*-javadoc.jar" \
-exec cp {} "$BUILD_DIR/lib/" \;
# --- END: DEFINITIVE 2-STEP ASSEMBLY ---
# 5. Create the startup script in the 'bin' directory
echo "INFO: Creating startup script at '$BUILD_DIR/bin/tika-server.sh'..."
cat << 'EOF' > "$BUILD_DIR/bin/tika-server.sh"
#!/bin/bash
#================================================
# Apache Tika Server Startup Script (Full-Featured)
#================================================
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
INSTALL_DIR="$(dirname "$DIR")"
LIB_DIR="$INSTALL_DIR/lib"
CONF_DIR="$INSTALL_DIR/conf"
CLASSPATH=""
for jar in "$LIB_DIR"/*.jar; do
CLASSPATH="$CLASSPATH:$jar"
done
CLASSPATH="${CLASSPATH#:}"
# The main class for the server is in the 'tika-server-core' module
TIKA_MAIN_CLASS="org.apache.tika.server.core.TikaServerCli"
echo "INFO: Starting Tika Server with full classpath..."
# The service will pass its own JAVA_OPTS
if [[ " $@ " != *" --config "* ]]; then
exec java $JAVA_OPTS -cp "$CONF_DIR:$CLASSPATH" "$TIKA_MAIN_CLASS" --config "$CONF_DIR/tika-config.xml" "$@"
else
exec java $JAVA_OPTS -cp "$CONF_DIR:$CLASSPATH" "$TIKA_MAIN_CLASS" "$@"
fi
EOF
chmod +x "$BUILD_DIR/bin/tika-server.sh"
# 6. Create the configuration file in the 'conf' directory
echo "INFO: Creating config file at '$BUILD_DIR/conf/tika-config.xml'..."
cat << EOF > "$BUILD_DIR/conf/tika-config.xml"
<?xml version="1.0" encoding="UTF-8"?>
<properties>
<server>
<port>${SERVER_PORT}</port>
</server>
<parsers>
<parser class="org.apache.tika.parser.DefaultParser"/>
</parsers>
</properties>
EOF
echo ""
echo "--- Packaging Complete. Starting System Installation... ---"
# 7. Create a dedicated system user for the service
if id -u "$SERVICE_USER" >/dev/null 2>&1; then
echo "INFO: User '$SERVICE_USER' already exists. Skipping creation."
else
echo "INFO: Creating system user '$SERVICE_USER'..."
useradd --system --shell /sbin/nologin --user-group "$SERVICE_USER"
fi
# 8. Install the application to the system directory
if [ -d "$INSTALL_DIR" ]; then
echo "INFO: Removing existing installation directory: $INSTALL_DIR"
rm -rf "$INSTALL_DIR"
fi
echo "INFO: Installing application to '$INSTALL_DIR'..."
mv "$BUILD_DIR" "$INSTALL_DIR"
echo "INFO: Setting ownership and permissions..."
chown -R "$SERVICE_USER":"$SERVICE_USER" "$INSTALL_DIR"
chmod +x "$INSTALL_DIR/bin/tika-server.sh"
# 9. Configure Firewall
if command -v firewall-cmd &> /dev/null; then
echo "INFO: 'firewalld' detected. Configuring firewall..."
if ! firewall-cmd --permanent --query-port=${SERVER_PORT}/tcp; then
firewall-cmd --permanent --add-port=${SERVER_PORT}/tcp
echo "INFO: Port ${SERVER_PORT}/tcp opened."
firewall-cmd --reload
else
echo "INFO: Port ${SERVER_PORT}/tcp is already open."
fi
elif command -v ufw &> /dev/null; then
echo "INFO: 'ufw' detected. Configuring firewall..."
if ! ufw status | grep -qw "${SERVER_PORT}/tcp"; then
ufw allow ${SERVER_PORT}/tcp
echo "INFO: Port ${SERVER_PORT}/tcp allowed."
else
echo "INFO: Rule for port ${SERVER_PORT}/tcp already exists."
fi
else
echo "WARNING: No 'firewalld' or 'ufw' found. Please manually open TCP port ${SERVER_PORT}."
fi
# 10. Create and enable the systemd service
echo "INFO: Creating systemd service file at '/etc/systemd/system/${SERVICE_NAME}'..."
cat << EOF > "/etc/systemd/system/${SERVICE_NAME}"
[Unit]
Description=Apache Tika Server (Full-Featured)
After=network.target
[Service]
User=${SERVICE_USER}
Group=${SERVICE_USER}
Type=simple
WorkingDirectory=${INSTALL_DIR}
ExecStart=${INSTALL_DIR}/bin/tika-server.sh
Restart=on-failure
RestartSec=10
# Set Java memory to 32GB
Environment="JAVA_OPTS=-Xmx32g"
[Install]
WantedBy=multi-user.target
EOF
echo "INFO: Reloading systemd daemon..."
systemctl daemon-reload
echo "INFO: Enabling Tika server to start on boot..."
systemctl enable "$SERVICE_NAME"
echo "INFO: Starting Tika server now..."
systemctl start "$SERVICE_NAME"
# 11. Final cleanup
echo "INFO: Removing Tika source code directory to save space..."
rm -rf "$SOURCE_DIR"
echo ""
echo "--- Installation Complete ---"
echo "A full-featured Tika Server has been installed and started as a service."
echo ""
echo "Installation Directory: ${INSTALL_DIR}"
echo "Service Name: ${SERVICE_NAME}"
echo ""
echo "You can check the service status with:"
echo "sudo systemctl status ${SERVICE_NAME}"
echo ""
echo "To view logs, you can use:"
echo "sudo journalctl -u ${SERVICE_NAME} -f"
echo "---------------------------------"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment