Last active
July 14, 2025 19:25
-
-
Save sycomix/51a1ca264abe77673636b7201401dbd6 to your computer and use it in GitHub Desktop.
tika build and install server from source
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/bash | |
| #============================================================================== | |
| # Download, Compile, and Install Apache Tika Server (Definitive Edition) | |
| # | |
| # This script fully automates the setup of a full-featured Apache Tika server. | |
| # It uses a robust two-step process to guarantee ALL modules and dependencies | |
| # are correctly packaged before installing the application as a system service. | |
| # | |
| # Prerequisites: | |
| # - This script must be run with root privileges (e.g., using 'sudo'). | |
| # - 'git' and 'maven' must be installed on the system. | |
| # | |
| #============================================================================== | |
| # --- Configuration --- | |
| set -e | |
| SOURCE_DIR="tika" | |
| BUILD_DIR="tika_server_build" | |
| INSTALL_DIR="/opt/tika-server-full" | |
| SERVICE_USER="tika" | |
| SERVICE_NAME="tika-server.service" | |
| SERVER_PORT="9998" | |
| # --- Script Body --- | |
| # 1. Privilege and Dependency Checks | |
| echo "INFO: Checking prerequisites..." | |
| if [ "$(id -u)" -ne 0 ]; then | |
| echo "ERROR: This script must be run as root or with sudo." | |
| exit 1 | |
| fi | |
| if ! command -v git &> /dev/null; then | |
| echo "ERROR: 'git' is not installed. Please install it first (e.g., 'sudo apt install git')." | |
| exit 1 | |
| fi | |
| if ! command -v mvn &> /dev/null; then | |
| echo "ERROR: 'maven' is not installed. Please install it first (e.g., 'sudo apt install maven')." | |
| exit 1 | |
| fi | |
| echo "INFO: Prerequisites met." | |
| # 2. Download and Compile Apache Tika | |
| echo "INFO: Preparing to download and compile Apache Tika..." | |
| if [ -d "$SOURCE_DIR" ]; then | |
| echo "INFO: Removing existing Tika source directory to ensure a fresh clone." | |
| rm -rf "$SOURCE_DIR" | |
| fi | |
| echo "INFO: Cloning the main branch of Apache Tika source code from GitHub..." | |
| git clone --depth 1 -b main https://github.com/apache/tika.git "$SOURCE_DIR" && cd "$SOURCE_DIR" git checkout TIKA-4345-v2 && cd .. | |
| echo "INFO: Compiling Apache Tika with Maven. This will take several minutes..." | |
| mvn -f "$SOURCE_DIR/pom.xml" install -DskipTests | |
| echo "INFO: Compilation complete." | |
| # 3. Start Packaging Process | |
| echo "INFO: Starting the Tika Server (Full-Featured) packaging process..." | |
| if [ -d "$BUILD_DIR" ]; then | |
| echo "INFO: Removing existing temporary build directory: $BUILD_DIR" | |
| rm -rf "$BUILD_DIR" | |
| fi | |
| echo "INFO: Creating temporary build directory at '$BUILD_DIR'..." | |
| mkdir -p "$BUILD_DIR/bin" | |
| mkdir -p "$BUILD_DIR/lib" | |
| mkdir -p "$BUILD_DIR/conf" | |
| # 4. Assemble the 'lib' directory with ALL necessary JARs | |
| echo "INFO: Assembling application libraries..." | |
| # --- START: DEFINITIVE 2-STEP ASSEMBLY --- | |
| # STEP 1: Copy all third-party dependencies from 'tika-app'. | |
| echo "INFO: Step 1 of 2: Copying all third-party dependency JARs..." | |
| mvn -f "$SOURCE_DIR/pom.xml" -pl org.apache.tika:tika-app dependency:copy-dependencies -DoutputDirectory="$BUILD_DIR/lib" | |
| # STEP 2: Find and copy all of Tika's own modules to ensure none are missed. | |
| echo "INFO: Step 2 of 2: Copying all Tika-specific module JARs (including server components)..." | |
| find "$SOURCE_DIR" -path "*/target/tika-*.jar" -type f \ | |
| -not -name "*-tests.jar" \ | |
| -not -name "*-sources.jar" \ | |
| -not -name "*-javadoc.jar" \ | |
| -exec cp {} "$BUILD_DIR/lib/" \; | |
| # --- END: DEFINITIVE 2-STEP ASSEMBLY --- | |
| # 5. Create the startup script in the 'bin' directory | |
| echo "INFO: Creating startup script at '$BUILD_DIR/bin/tika-server.sh'..." | |
| cat << 'EOF' > "$BUILD_DIR/bin/tika-server.sh" | |
| #!/bin/bash | |
| #================================================ | |
| # Apache Tika Server Startup Script (Full-Featured) | |
| #================================================ | |
| DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" | |
| INSTALL_DIR="$(dirname "$DIR")" | |
| LIB_DIR="$INSTALL_DIR/lib" | |
| CONF_DIR="$INSTALL_DIR/conf" | |
| CLASSPATH="" | |
| for jar in "$LIB_DIR"/*.jar; do | |
| CLASSPATH="$CLASSPATH:$jar" | |
| done | |
| CLASSPATH="${CLASSPATH#:}" | |
| # The main class for the server is in the 'tika-server-core' module | |
| TIKA_MAIN_CLASS="org.apache.tika.server.core.TikaServerCli" | |
| echo "INFO: Starting Tika Server with full classpath..." | |
| # The service will pass its own JAVA_OPTS | |
| if [[ " $@ " != *" --config "* ]]; then | |
| exec java $JAVA_OPTS -cp "$CONF_DIR:$CLASSPATH" "$TIKA_MAIN_CLASS" --config "$CONF_DIR/tika-config.xml" "$@" | |
| else | |
| exec java $JAVA_OPTS -cp "$CONF_DIR:$CLASSPATH" "$TIKA_MAIN_CLASS" "$@" | |
| fi | |
| EOF | |
| chmod +x "$BUILD_DIR/bin/tika-server.sh" | |
| # 6. Create the configuration file in the 'conf' directory | |
| echo "INFO: Creating config file at '$BUILD_DIR/conf/tika-config.xml'..." | |
| cat << EOF > "$BUILD_DIR/conf/tika-config.xml" | |
| <?xml version="1.0" encoding="UTF-8"?> | |
| <properties> | |
| <server> | |
| <port>${SERVER_PORT}</port> | |
| </server> | |
| <parsers> | |
| <parser class="org.apache.tika.parser.DefaultParser"/> | |
| </parsers> | |
| </properties> | |
| EOF | |
| echo "" | |
| echo "--- Packaging Complete. Starting System Installation... ---" | |
| # 7. Create a dedicated system user for the service | |
| if id -u "$SERVICE_USER" >/dev/null 2>&1; then | |
| echo "INFO: User '$SERVICE_USER' already exists. Skipping creation." | |
| else | |
| echo "INFO: Creating system user '$SERVICE_USER'..." | |
| useradd --system --shell /sbin/nologin --user-group "$SERVICE_USER" | |
| fi | |
| # 8. Install the application to the system directory | |
| if [ -d "$INSTALL_DIR" ]; then | |
| echo "INFO: Removing existing installation directory: $INSTALL_DIR" | |
| rm -rf "$INSTALL_DIR" | |
| fi | |
| echo "INFO: Installing application to '$INSTALL_DIR'..." | |
| mv "$BUILD_DIR" "$INSTALL_DIR" | |
| echo "INFO: Setting ownership and permissions..." | |
| chown -R "$SERVICE_USER":"$SERVICE_USER" "$INSTALL_DIR" | |
| chmod +x "$INSTALL_DIR/bin/tika-server.sh" | |
| # 9. Configure Firewall | |
| if command -v firewall-cmd &> /dev/null; then | |
| echo "INFO: 'firewalld' detected. Configuring firewall..." | |
| if ! firewall-cmd --permanent --query-port=${SERVER_PORT}/tcp; then | |
| firewall-cmd --permanent --add-port=${SERVER_PORT}/tcp | |
| echo "INFO: Port ${SERVER_PORT}/tcp opened." | |
| firewall-cmd --reload | |
| else | |
| echo "INFO: Port ${SERVER_PORT}/tcp is already open." | |
| fi | |
| elif command -v ufw &> /dev/null; then | |
| echo "INFO: 'ufw' detected. Configuring firewall..." | |
| if ! ufw status | grep -qw "${SERVER_PORT}/tcp"; then | |
| ufw allow ${SERVER_PORT}/tcp | |
| echo "INFO: Port ${SERVER_PORT}/tcp allowed." | |
| else | |
| echo "INFO: Rule for port ${SERVER_PORT}/tcp already exists." | |
| fi | |
| else | |
| echo "WARNING: No 'firewalld' or 'ufw' found. Please manually open TCP port ${SERVER_PORT}." | |
| fi | |
| # 10. Create and enable the systemd service | |
| echo "INFO: Creating systemd service file at '/etc/systemd/system/${SERVICE_NAME}'..." | |
| cat << EOF > "/etc/systemd/system/${SERVICE_NAME}" | |
| [Unit] | |
| Description=Apache Tika Server (Full-Featured) | |
| After=network.target | |
| [Service] | |
| User=${SERVICE_USER} | |
| Group=${SERVICE_USER} | |
| Type=simple | |
| WorkingDirectory=${INSTALL_DIR} | |
| ExecStart=${INSTALL_DIR}/bin/tika-server.sh | |
| Restart=on-failure | |
| RestartSec=10 | |
| # Set Java memory to 32GB | |
| Environment="JAVA_OPTS=-Xmx32g" | |
| [Install] | |
| WantedBy=multi-user.target | |
| EOF | |
| echo "INFO: Reloading systemd daemon..." | |
| systemctl daemon-reload | |
| echo "INFO: Enabling Tika server to start on boot..." | |
| systemctl enable "$SERVICE_NAME" | |
| echo "INFO: Starting Tika server now..." | |
| systemctl start "$SERVICE_NAME" | |
| # 11. Final cleanup | |
| echo "INFO: Removing Tika source code directory to save space..." | |
| rm -rf "$SOURCE_DIR" | |
| echo "" | |
| echo "--- Installation Complete ---" | |
| echo "A full-featured Tika Server has been installed and started as a service." | |
| echo "" | |
| echo "Installation Directory: ${INSTALL_DIR}" | |
| echo "Service Name: ${SERVICE_NAME}" | |
| echo "" | |
| echo "You can check the service status with:" | |
| echo "sudo systemctl status ${SERVICE_NAME}" | |
| echo "" | |
| echo "To view logs, you can use:" | |
| echo "sudo journalctl -u ${SERVICE_NAME} -f" | |
| echo "---------------------------------" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment