linuxmalaysia · May 20, 2025 23:20 · linuxmalaysia · May 20, 2025 · linuxmalaysia · May 20, 2025
diff --git a/check_deletable_verbose_robust.sh b/check_deletable_verbose_robust.sh
 #!/bin/bash
 # By Harisfazillah Jamel with Google Gemini Help
 # 20250521
 #
 # This script is a robust Elasticsearch index assessment tool designed
 # to help you identify and manage old or inactive indices for .ds-logs and .ds-metrics.

 # Configuration - Adjust ES_HOST if necessary
 ES_HOST="localhost:9200"
 CERT_PATH="/etc/elasticsearch/certs/http_ca.crt" # Path to your CA CA certificate

 # --- Define Output File ---
 OUTPUT_FILE="deletable_indices_${TARGET_MONTH_YEAR}.txt" # Default filename, will be set after TARGET_MONTH_YEAR input

 # --- WARNING AND DISCLAIMER ---
 echo "**************************************************************************************************"
 echo "WARNING: This script identifies indices that *may* be deletable based on common criteria."
 echo "         However, it's crucial that you **MANUALLY VERIFY** the purpose and contents of"
 echo "         each listed index before proceeding with any deletion. Indices related to"
 echo "         Elasticsearch's internal functions, Kibana, or specific applications might"
 echo "         be flagged as 'DELETABLE' but are essential for your cluster's operation."
 echo ""
 echo "         **PROCEED WITH EXTREME CAUTION.**"
 echo "         We are not responsible for any data loss or operational issues resulting"
 echo "         from the deletion of indices identified by this script."
 echo "**************************************************************************************************"
 echo ""

 # --- Prompt for Username and Password ---
 read -p "Enter Elasticsearch username: " ES_USER
 read -s -p "Enter Elasticsearch password: " ES_PASS
 echo # Add a newline after password input

 # --- User Input for Month/Year ---
 read -p "Enter the year and month to check (e.g., 2025.01): " TARGET_MONTH_YEAR

 if [[ -z "$TARGET_MONTH_YEAR" ]]; then
    echo "Year and month cannot be empty. Exiting."
    exit 1
 fi

 # Set the output filename dynamically based on user input
 OUTPUT_FILE="deletable_indices_${TARGET_MONTH_YEAR}.txt"

 # --- Check if CA certificate exists ---
 if [[ ! -f "$CERT_PATH" ]]; then
    echo "Error: CA certificate not found at '$CERT_PATH'."
    echo "Please ensure the path is correct and the file exists."
    exit 1
 fi

 # Function to execute curl with authentication and certificate
 execute_curl() {
    local method="$1"
    local endpoint="$2"
    local data="$3" # Optional data for POST/PUT requests

    if [[ -n "$data" ]]; then
        curl -s --cacert "${CERT_PATH}" -u "${ES_USER}:${ES_PASS}" -X "${method}" "https://${ES_HOST}${endpoint}" -H 'Content-Type: application/json' -d "${data}"
    else
        curl -s --cacert "${CERT_PATH}" -u "${ES_USER}:${ES_PASS}" -X "${method}" "https://${ES_HOST}${endpoint}"
    fi
 }

 # Function to convert bytes to human-readable format
 convert_bytes_readable() {
    local bytes="$1"
    if (( $(echo "$bytes >= 1073741824" | bc -l) )); then # 1 GB
        printf "%.1fGB" $(echo "$bytes / 1073741824" | bc -l)
    elif (( $(echo "$bytes >= 1048576" | bc -l) )); then # 1 MB
        printf "%.1fMB" $(echo "$bytes / 1048576" | bc -l)
    elif (( $(echo "$bytes >= 1024" | bc -l) )); then # 1 KB
        printf "%.1fKB" $(echo "$bytes / 1024" | bc -l)
    else
        echo "${bytes}b"
    fi
 }


 echo ""
 echo "--- Starting detailed index check for ${TARGET_MONTH_YEAR} ---"
 echo "Processing Format: Index Name | Docs Count | Store Size | Creation Date [STATUS]"
 echo "------------------------------------------------------------------------------------------------------"

 # Array to store deletable index names
 DELETABLE_INDICES=()

 # Get ALL indices and then filter them for the target month/year AND the desired prefixes
 all_indices_raw=$(execute_curl GET "/_cat/indices?h=index,docs.count,store.size,creation.date.string&s=index&bytes=b")

 # --- MODIFIED FILTERING ---
 # Filter for the target month/year AND then pipe to egrep for .ds-logs or .ds-metrics
 indices_output=$(echo "$all_indices_raw" | grep "${TARGET_MONTH_YEAR}" | egrep '^\.ds-logs-|^.ds-metrics-')
 # --- END MODIFIED FILTERING ---

 if [[ -z "$indices_output" ]]; then
    echo "No indices found matching '${TARGET_MONTH_YEAR}' with '.ds-logs-' or '.ds-metrics-' prefix."
    echo "------------------------------------------------------------------------------------------------------"
 else
    # Use process substitution <() for the while loop to preserve array changes
    while read -r line; do
        # Skip potential header lines if any
        if [[ "$line" =~ ^index ]]; then
            continue
        fi

        index_name=$(echo "$line" | awk '{print $1}')
        docs_count=$(echo "$line" | awk '{print $2}')
        store_size_bytes=$(echo "$line" | awk '{print $3}')
        creation_date=$(echo "$line" | awk '{print $4}')

        # Convert bytes to a more readable format (KB, MB, GB)
        store_size_readable=$(convert_bytes_readable "$store_size_bytes")

        status_message="UNKNOWN"

        # Check if it's a data stream backing index (starts with .ds-)
        if [[ "$index_name" == ".ds-"* ]]; then
            # Extract the data stream name from the backing index name
            # Assuming pattern: .ds-<data_stream_name>-YYYY.MM.DD-GEN
            data_stream_name_parsed=$(echo "$index_name" | sed -E 's/^\.ds-(.*)-[0-9]{4}\.[0-9]{2}\.[0-9]{2}-[0-9]{6}/\1/')

            if [[ -n "$data_stream_name_parsed" ]]; then
                data_stream_info=$(execute_curl GET "/_data_stream/${data_stream_name_parsed}?pretty")

                # Check if data_stream_info indicates an error or no data stream found
                if echo "$data_stream_info" | grep -q '"error"'; then
                    status_message="DS CHECK FAILED (API Error)"
                else
                    write_index=$(echo "$data_stream_info" | jq -r '.data_streams[0].indices[-1].index_name // empty')

                    if [[ "$index_name" == "$write_index" ]]; then
                        status_message="DS - WRITE INDEX (CANNOT DELETE)"
                    else
                        status_message="DS - NOT WRITE INDEX (DELETABLE)"
                        DELETABLE_INDICES+=("$index_name") # Add to array
                    fi
                fi
            else
                status_message="DS - PARSING FAILED (TREATING AS REGULAR)"
                DELETABLE_INDICES+=("$index_name") # Add to array
            fi
        else
            # If it's not a .ds- index, we treat it as deletable if it passed the initial grep
            # However, with the new egrep, only .ds-logs and .ds-metrics will get here anyway.
            status_message="REGULAR INDEX (DELETABLE)"
            DELETABLE_INDICES+=("$index_name") # Add to array
        fi

        printf "%-70s | %10s | %10s | %s [%s]\n" \
               "$index_name" "$docs_count" "$store_size_readable" "$creation_date" "$status_message"
    done < <(echo "$indices_output")
 fi

 echo "------------------------------------------------------------------------------------------------------"
 echo ""
 echo "--- SUMMARY: Indices identified as potentially DELETABLE for ${TARGET_MONTH_YEAR} ---"
 echo "(These are indices that are NOT data stream write indices)"
 echo "-------------------------------------------------------------------------------------"

 if [[ ${#DELETABLE_INDICES[@]} -eq 0 ]]; then
    echo "No potentially deletable indices found for ${TARGET_MONTH_YEAR}." | tee -a "$OUTPUT_FILE"
 else
    # Output header to screen and file
    echo "Index Name | Docs Count | Store Size | Creation Date" | tee "$OUTPUT_FILE"
    echo "-------------------------------------------------------------------------------------" | tee -a "$OUTPUT_FILE"

    # Loop through each deletable index and fetch its info individually for precise formatting
    for idx_name in "${DELETABLE_INDICES[@]}"; do
        # Use specific columns for clarity and consistency
        idx_info=$(execute_curl GET "/_cat/indices/${idx_name}?h=index,docs.count,store.size,creation.date.string&bytes=b")
        if [[ -n "$idx_info" ]]; then
            summary_index_name=$(echo "$idx_info" | awk '{print $1}')
            summary_docs_count=$(echo "$idx_info" | awk '{print $2}')
            summary_store_size_bytes=$(echo "$idx_info" | awk '{print $3}')
            summary_creation_date=$(echo "$idx_info" | awk '{print $4}')
            
            summary_store_size_readable=$(convert_bytes_readable "$summary_store_size_bytes")

            # Print to screen AND append to the file
            printf "%-50s | %10s | %10s | %s\n" \
                   "$summary_index_name" "$summary_docs_count" "$summary_store_size_readable" "$summary_creation_date" | tee -a "$OUTPUT_FILE"
        else
            printf "%-50s | %10s | %10s | %s\n" "$idx_name" "N/A" "N/A" "N/A (Error fetching details)" | tee -a "$OUTPUT_FILE"
        fi
    done
    echo "" | tee -a "$OUTPUT_FILE" # Add a newline at the end of the list in the file
    echo "Deletable index names also saved to: $OUTPUT_FILE"
 fi

 echo "-------------------------------------------------------------------------------------"
 echo "IMPORTANT NOTES:"
 echo "1. Data streams with 'DS - WRITE INDEX' cannot be deleted directly."
 echo "2. Indices marked 'DELETABLE' can be deleted, but always confirm actual data needs."
 echo "3. For '.ds-' indices, configure and rely on ILM for automated deletion if possible."
 echo "4. The 'docs.count' and 'store.size' columns help identify truly empty or small indices."
 echo "5. Always **MANUALLY VERIFY** indices before deletion, especially those related to Elasticsearch's internal functions or Kibana. We are not responsible for data loss."
 echo "-------------------------------------------------------------------------------------"
	#!/bin/bash
	# By Harisfazillah Jamel with Google Gemini Help
	# 20250521
	#
	# This script is a robust Elasticsearch index assessment tool designed
	# to help you identify and manage old or inactive indices for .ds-logs and .ds-metrics.

	# Configuration - Adjust ES_HOST if necessary
	ES_HOST="localhost:9200"
	CERT_PATH="/etc/elasticsearch/certs/http_ca.crt" # Path to your CA CA certificate

	# --- Define Output File ---
	OUTPUT_FILE="deletable_indices_${TARGET_MONTH_YEAR}.txt" # Default filename, will be set after TARGET_MONTH_YEAR input

	# --- WARNING AND DISCLAIMER ---
	echo "**************************************************************************************************"
	echo "WARNING: This script identifies indices that may be deletable based on common criteria."
	echo " However, it's crucial that you MANUALLY VERIFY the purpose and contents of"
	echo " each listed index before proceeding with any deletion. Indices related to"
	echo " Elasticsearch's internal functions, Kibana, or specific applications might"
	echo " be flagged as 'DELETABLE' but are essential for your cluster's operation."
	echo ""
	echo " PROCEED WITH EXTREME CAUTION."
	echo " We are not responsible for any data loss or operational issues resulting"
	echo " from the deletion of indices identified by this script."
	echo "**************************************************************************************************"
	echo ""

	# --- Prompt for Username and Password ---
	read -p "Enter Elasticsearch username: " ES_USER
	read -s -p "Enter Elasticsearch password: " ES_PASS
	echo # Add a newline after password input

	# --- User Input for Month/Year ---
	read -p "Enter the year and month to check (e.g., 2025.01): " TARGET_MONTH_YEAR

	if [[ -z "$TARGET_MONTH_YEAR" ]]; then
	echo "Year and month cannot be empty. Exiting."
	exit 1
	fi

	# Set the output filename dynamically based on user input
	OUTPUT_FILE="deletable_indices_${TARGET_MONTH_YEAR}.txt"

	# --- Check if CA certificate exists ---
	if [[ ! -f "$CERT_PATH" ]]; then
	echo "Error: CA certificate not found at '$CERT_PATH'."
	echo "Please ensure the path is correct and the file exists."
	exit 1
	fi

	# Function to execute curl with authentication and certificate
	execute_curl() {
	local method="$1"
	local endpoint="$2"
	local data="$3" # Optional data for POST/PUT requests

	if [[ -n "$data" ]]; then
	curl -s --cacert "${CERT_PATH}" -u "${ES_USER}:${ES_PASS}" -X "${method}" "https://${ES_HOST}${endpoint}" -H 'Content-Type: application/json' -d "${data}"
	else
	curl -s --cacert "${CERT_PATH}" -u "${ES_USER}:${ES_PASS}" -X "${method}" "https://${ES_HOST}${endpoint}"
	fi
	}

	# Function to convert bytes to human-readable format
	convert_bytes_readable() {
	local bytes="$1"
	if (( $(echo "$bytes >= 1073741824" \| bc -l) )); then # 1 GB
	printf "%.1fGB" $(echo "$bytes / 1073741824" \| bc -l)
	elif (( $(echo "$bytes >= 1048576" \| bc -l) )); then # 1 MB
	printf "%.1fMB" $(echo "$bytes / 1048576" \| bc -l)
	elif (( $(echo "$bytes >= 1024" \| bc -l) )); then # 1 KB
	printf "%.1fKB" $(echo "$bytes / 1024" \| bc -l)
	else
	echo "${bytes}b"
	fi
	}


	echo ""
	echo "--- Starting detailed index check for ${TARGET_MONTH_YEAR} ---"
	echo "Processing Format: Index Name \| Docs Count \| Store Size \| Creation Date [STATUS]"
	echo "------------------------------------------------------------------------------------------------------"

	# Array to store deletable index names
	DELETABLE_INDICES=()

	# Get ALL indices and then filter them for the target month/year AND the desired prefixes
	all_indices_raw=$(execute_curl GET "/_cat/indices?h=index,docs.count,store.size,creation.date.string&s=index&bytes=b")

	# --- MODIFIED FILTERING ---
	# Filter for the target month/year AND then pipe to egrep for .ds-logs or .ds-metrics
	indices_output=$(echo "$all_indices_raw" \| grep "${TARGET_MONTH_YEAR}" \| egrep '^\.ds-logs-\|^.ds-metrics-')
	# --- END MODIFIED FILTERING ---

	if [[ -z "$indices_output" ]]; then
	echo "No indices found matching '${TARGET_MONTH_YEAR}' with '.ds-logs-' or '.ds-metrics-' prefix."
	echo "------------------------------------------------------------------------------------------------------"
	else
	# Use process substitution <() for the while loop to preserve array changes
	while read -r line; do
	# Skip potential header lines if any
	if [[ "$line" =~ ^index ]]; then
	continue
	fi

	index_name=$(echo "$line" \| awk '{print $1}')
	docs_count=$(echo "$line" \| awk '{print $2}')
	store_size_bytes=$(echo "$line" \| awk '{print $3}')
	creation_date=$(echo "$line" \| awk '{print $4}')

	# Convert bytes to a more readable format (KB, MB, GB)
	store_size_readable=$(convert_bytes_readable "$store_size_bytes")

	status_message="UNKNOWN"

	# Check if it's a data stream backing index (starts with .ds-)
	if [[ "$index_name" == ".ds-"* ]]; then
	# Extract the data stream name from the backing index name
	# Assuming pattern: .ds-<data_stream_name>-YYYY.MM.DD-GEN
	data_stream_name_parsed=$(echo "$index_name" \| sed -E 's/^\.ds-(.*)-[0-9]{4}\.[0-9]{2}\.[0-9]{2}-[0-9]{6}/\1/')

	if [[ -n "$data_stream_name_parsed" ]]; then
	data_stream_info=$(execute_curl GET "/_data_stream/${data_stream_name_parsed}?pretty")

	# Check if data_stream_info indicates an error or no data stream found
	if echo "$data_stream_info" \| grep -q '"error"'; then
	status_message="DS CHECK FAILED (API Error)"
	else
	write_index=$(echo "$data_stream_info" \| jq -r '.data_streams[0].indices[-1].index_name // empty')

	if [[ "$index_name" == "$write_index" ]]; then
	status_message="DS - WRITE INDEX (CANNOT DELETE)"
	else
	status_message="DS - NOT WRITE INDEX (DELETABLE)"
	DELETABLE_INDICES+=("$index_name") # Add to array
	fi
	fi
	else
	status_message="DS - PARSING FAILED (TREATING AS REGULAR)"
	DELETABLE_INDICES+=("$index_name") # Add to array
	fi
	else
	# If it's not a .ds- index, we treat it as deletable if it passed the initial grep
	# However, with the new egrep, only .ds-logs and .ds-metrics will get here anyway.
	status_message="REGULAR INDEX (DELETABLE)"
	DELETABLE_INDICES+=("$index_name") # Add to array
	fi

	printf "%-70s \| %10s \| %10s \| %s [%s]\n" \
	"$index_name" "$docs_count" "$store_size_readable" "$creation_date" "$status_message"
	done < <(echo "$indices_output")
	fi

	echo "------------------------------------------------------------------------------------------------------"
	echo ""
	echo "--- SUMMARY: Indices identified as potentially DELETABLE for ${TARGET_MONTH_YEAR} ---"
	echo "(These are indices that are NOT data stream write indices)"
	echo "-------------------------------------------------------------------------------------"

	if [[ ${#DELETABLE_INDICES[@]} -eq 0 ]]; then
	echo "No potentially deletable indices found for ${TARGET_MONTH_YEAR}." \| tee -a "$OUTPUT_FILE"
	else
	# Output header to screen and file
	echo "Index Name \| Docs Count \| Store Size \| Creation Date" \| tee "$OUTPUT_FILE"
	echo "-------------------------------------------------------------------------------------" \| tee -a "$OUTPUT_FILE"

	# Loop through each deletable index and fetch its info individually for precise formatting
	for idx_name in "${DELETABLE_INDICES[@]}"; do
	# Use specific columns for clarity and consistency
	idx_info=$(execute_curl GET "/_cat/indices/${idx_name}?h=index,docs.count,store.size,creation.date.string&bytes=b")
	if [[ -n "$idx_info" ]]; then
	summary_index_name=$(echo "$idx_info" \| awk '{print $1}')
	summary_docs_count=$(echo "$idx_info" \| awk '{print $2}')
	summary_store_size_bytes=$(echo "$idx_info" \| awk '{print $3}')
	summary_creation_date=$(echo "$idx_info" \| awk '{print $4}')

	summary_store_size_readable=$(convert_bytes_readable "$summary_store_size_bytes")

	# Print to screen AND append to the file
	printf "%-50s \| %10s \| %10s \| %s\n" \
	"$summary_index_name" "$summary_docs_count" "$summary_store_size_readable" "$summary_creation_date" \| tee -a "$OUTPUT_FILE"
	else
	printf "%-50s \| %10s \| %10s \| %s\n" "$idx_name" "N/A" "N/A" "N/A (Error fetching details)" \| tee -a "$OUTPUT_FILE"
	fi
	done
	echo "" \| tee -a "$OUTPUT_FILE" # Add a newline at the end of the list in the file
	echo "Deletable index names also saved to: $OUTPUT_FILE"
	fi

	echo "-------------------------------------------------------------------------------------"
	echo "IMPORTANT NOTES:"
	echo "1. Data streams with 'DS - WRITE INDEX' cannot be deleted directly."
	echo "2. Indices marked 'DELETABLE' can be deleted, but always confirm actual data needs."
	echo "3. For '.ds-' indices, configure and rely on ILM for automated deletion if possible."
	echo "4. The 'docs.count' and 'store.size' columns help identify truly empty or small indices."
	echo "5. Always MANUALLY VERIFY indices before deletion, especially those related to Elasticsearch's internal functions or Kibana. We are not responsible for data loss."
	echo "-------------------------------------------------------------------------------------"