Last active
May 20, 2025 23:20
-
-
Save linuxmalaysia/8d0e1997a3e4c24c0777062d7d91a345 to your computer and use it in GitHub Desktop.
This script is a robust Elasticsearch index assessment tool designed to help you identify and manage old or inactive indices for .ds-logs and .ds-metrics. It connects securely to your Elasticsearch cluster, lists indices for a specified month and year, and determines their deletion status (e.g., whether they are a write index for a data stream).…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# By Harisfazillah Jamel with Google Gemini Help | |
# 20250521 | |
# | |
# This script is a robust Elasticsearch index assessment tool designed | |
# to help you identify and manage old or inactive indices for .ds-logs and .ds-metrics. | |
# Configuration - Adjust ES_HOST if necessary | |
ES_HOST="localhost:9200" | |
CERT_PATH="/etc/elasticsearch/certs/http_ca.crt" # Path to your CA CA certificate | |
# --- Define Output File --- | |
OUTPUT_FILE="deletable_indices_${TARGET_MONTH_YEAR}.txt" # Default filename, will be set after TARGET_MONTH_YEAR input | |
# --- WARNING AND DISCLAIMER --- | |
echo "**************************************************************************************************" | |
echo "WARNING: This script identifies indices that *may* be deletable based on common criteria." | |
echo " However, it's crucial that you **MANUALLY VERIFY** the purpose and contents of" | |
echo " each listed index before proceeding with any deletion. Indices related to" | |
echo " Elasticsearch's internal functions, Kibana, or specific applications might" | |
echo " be flagged as 'DELETABLE' but are essential for your cluster's operation." | |
echo "" | |
echo " **PROCEED WITH EXTREME CAUTION.**" | |
echo " We are not responsible for any data loss or operational issues resulting" | |
echo " from the deletion of indices identified by this script." | |
echo "**************************************************************************************************" | |
echo "" | |
# --- Prompt for Username and Password --- | |
read -p "Enter Elasticsearch username: " ES_USER | |
read -s -p "Enter Elasticsearch password: " ES_PASS | |
echo # Add a newline after password input | |
# --- User Input for Month/Year --- | |
read -p "Enter the year and month to check (e.g., 2025.01): " TARGET_MONTH_YEAR | |
if [[ -z "$TARGET_MONTH_YEAR" ]]; then | |
echo "Year and month cannot be empty. Exiting." | |
exit 1 | |
fi | |
# Set the output filename dynamically based on user input | |
OUTPUT_FILE="deletable_indices_${TARGET_MONTH_YEAR}.txt" | |
# --- Check if CA certificate exists --- | |
if [[ ! -f "$CERT_PATH" ]]; then | |
echo "Error: CA certificate not found at '$CERT_PATH'." | |
echo "Please ensure the path is correct and the file exists." | |
exit 1 | |
fi | |
# Function to execute curl with authentication and certificate | |
execute_curl() { | |
local method="$1" | |
local endpoint="$2" | |
local data="$3" # Optional data for POST/PUT requests | |
if [[ -n "$data" ]]; then | |
curl -s --cacert "${CERT_PATH}" -u "${ES_USER}:${ES_PASS}" -X "${method}" "https://${ES_HOST}${endpoint}" -H 'Content-Type: application/json' -d "${data}" | |
else | |
curl -s --cacert "${CERT_PATH}" -u "${ES_USER}:${ES_PASS}" -X "${method}" "https://${ES_HOST}${endpoint}" | |
fi | |
} | |
# Function to convert bytes to human-readable format | |
convert_bytes_readable() { | |
local bytes="$1" | |
if (( $(echo "$bytes >= 1073741824" | bc -l) )); then # 1 GB | |
printf "%.1fGB" $(echo "$bytes / 1073741824" | bc -l) | |
elif (( $(echo "$bytes >= 1048576" | bc -l) )); then # 1 MB | |
printf "%.1fMB" $(echo "$bytes / 1048576" | bc -l) | |
elif (( $(echo "$bytes >= 1024" | bc -l) )); then # 1 KB | |
printf "%.1fKB" $(echo "$bytes / 1024" | bc -l) | |
else | |
echo "${bytes}b" | |
fi | |
} | |
echo "" | |
echo "--- Starting detailed index check for ${TARGET_MONTH_YEAR} ---" | |
echo "Processing Format: Index Name | Docs Count | Store Size | Creation Date [STATUS]" | |
echo "------------------------------------------------------------------------------------------------------" | |
# Array to store deletable index names | |
DELETABLE_INDICES=() | |
# Get ALL indices and then filter them for the target month/year AND the desired prefixes | |
all_indices_raw=$(execute_curl GET "/_cat/indices?h=index,docs.count,store.size,creation.date.string&s=index&bytes=b") | |
# --- MODIFIED FILTERING --- | |
# Filter for the target month/year AND then pipe to egrep for .ds-logs or .ds-metrics | |
indices_output=$(echo "$all_indices_raw" | grep "${TARGET_MONTH_YEAR}" | egrep '^\.ds-logs-|^.ds-metrics-') | |
# --- END MODIFIED FILTERING --- | |
if [[ -z "$indices_output" ]]; then | |
echo "No indices found matching '${TARGET_MONTH_YEAR}' with '.ds-logs-' or '.ds-metrics-' prefix." | |
echo "------------------------------------------------------------------------------------------------------" | |
else | |
# Use process substitution <() for the while loop to preserve array changes | |
while read -r line; do | |
# Skip potential header lines if any | |
if [[ "$line" =~ ^index ]]; then | |
continue | |
fi | |
index_name=$(echo "$line" | awk '{print $1}') | |
docs_count=$(echo "$line" | awk '{print $2}') | |
store_size_bytes=$(echo "$line" | awk '{print $3}') | |
creation_date=$(echo "$line" | awk '{print $4}') | |
# Convert bytes to a more readable format (KB, MB, GB) | |
store_size_readable=$(convert_bytes_readable "$store_size_bytes") | |
status_message="UNKNOWN" | |
# Check if it's a data stream backing index (starts with .ds-) | |
if [[ "$index_name" == ".ds-"* ]]; then | |
# Extract the data stream name from the backing index name | |
# Assuming pattern: .ds-<data_stream_name>-YYYY.MM.DD-GEN | |
data_stream_name_parsed=$(echo "$index_name" | sed -E 's/^\.ds-(.*)-[0-9]{4}\.[0-9]{2}\.[0-9]{2}-[0-9]{6}/\1/') | |
if [[ -n "$data_stream_name_parsed" ]]; then | |
data_stream_info=$(execute_curl GET "/_data_stream/${data_stream_name_parsed}?pretty") | |
# Check if data_stream_info indicates an error or no data stream found | |
if echo "$data_stream_info" | grep -q '"error"'; then | |
status_message="DS CHECK FAILED (API Error)" | |
else | |
write_index=$(echo "$data_stream_info" | jq -r '.data_streams[0].indices[-1].index_name // empty') | |
if [[ "$index_name" == "$write_index" ]]; then | |
status_message="DS - WRITE INDEX (CANNOT DELETE)" | |
else | |
status_message="DS - NOT WRITE INDEX (DELETABLE)" | |
DELETABLE_INDICES+=("$index_name") # Add to array | |
fi | |
fi | |
else | |
status_message="DS - PARSING FAILED (TREATING AS REGULAR)" | |
DELETABLE_INDICES+=("$index_name") # Add to array | |
fi | |
else | |
# If it's not a .ds- index, we treat it as deletable if it passed the initial grep | |
# However, with the new egrep, only .ds-logs and .ds-metrics will get here anyway. | |
status_message="REGULAR INDEX (DELETABLE)" | |
DELETABLE_INDICES+=("$index_name") # Add to array | |
fi | |
printf "%-70s | %10s | %10s | %s [%s]\n" \ | |
"$index_name" "$docs_count" "$store_size_readable" "$creation_date" "$status_message" | |
done < <(echo "$indices_output") | |
fi | |
echo "------------------------------------------------------------------------------------------------------" | |
echo "" | |
echo "--- SUMMARY: Indices identified as potentially DELETABLE for ${TARGET_MONTH_YEAR} ---" | |
echo "(These are indices that are NOT data stream write indices)" | |
echo "-------------------------------------------------------------------------------------" | |
if [[ ${#DELETABLE_INDICES[@]} -eq 0 ]]; then | |
echo "No potentially deletable indices found for ${TARGET_MONTH_YEAR}." | tee -a "$OUTPUT_FILE" | |
else | |
# Output header to screen and file | |
echo "Index Name | Docs Count | Store Size | Creation Date" | tee "$OUTPUT_FILE" | |
echo "-------------------------------------------------------------------------------------" | tee -a "$OUTPUT_FILE" | |
# Loop through each deletable index and fetch its info individually for precise formatting | |
for idx_name in "${DELETABLE_INDICES[@]}"; do | |
# Use specific columns for clarity and consistency | |
idx_info=$(execute_curl GET "/_cat/indices/${idx_name}?h=index,docs.count,store.size,creation.date.string&bytes=b") | |
if [[ -n "$idx_info" ]]; then | |
summary_index_name=$(echo "$idx_info" | awk '{print $1}') | |
summary_docs_count=$(echo "$idx_info" | awk '{print $2}') | |
summary_store_size_bytes=$(echo "$idx_info" | awk '{print $3}') | |
summary_creation_date=$(echo "$idx_info" | awk '{print $4}') | |
summary_store_size_readable=$(convert_bytes_readable "$summary_store_size_bytes") | |
# Print to screen AND append to the file | |
printf "%-50s | %10s | %10s | %s\n" \ | |
"$summary_index_name" "$summary_docs_count" "$summary_store_size_readable" "$summary_creation_date" | tee -a "$OUTPUT_FILE" | |
else | |
printf "%-50s | %10s | %10s | %s\n" "$idx_name" "N/A" "N/A" "N/A (Error fetching details)" | tee -a "$OUTPUT_FILE" | |
fi | |
done | |
echo "" | tee -a "$OUTPUT_FILE" # Add a newline at the end of the list in the file | |
echo "Deletable index names also saved to: $OUTPUT_FILE" | |
fi | |
echo "-------------------------------------------------------------------------------------" | |
echo "IMPORTANT NOTES:" | |
echo "1. Data streams with 'DS - WRITE INDEX' cannot be deleted directly." | |
echo "2. Indices marked 'DELETABLE' can be deleted, but always confirm actual data needs." | |
echo "3. For '.ds-' indices, configure and rely on ILM for automated deletion if possible." | |
echo "4. The 'docs.count' and 'store.size' columns help identify truly empty or small indices." | |
echo "5. Always **MANUALLY VERIFY** indices before deletion, especially those related to Elasticsearch's internal functions or Kibana. We are not responsible for data loss." | |
echo "-------------------------------------------------------------------------------------" |
Option 2: More Detailed (Recommended)
Elasticsearch Deletable Index Checker (for .ds-logs & .ds-metrics)
This Bash script (
check_deletable_verbose_robust.sh
) helps identify Elasticsearch indices that are candidates for deletion, with a specific focus on older backing indices of.ds-logs-
and.ds-metrics-
data streams.It's designed to be a pre-deletion auditing tool, providing detailed information and generating an input file for a companion deletion script.
Features:
- Interactive Credential Input: Prompts for Elasticsearch username and password (hidden).
- Targeted Month/Year: Allows specifying a specific year and month to check for old indices.
- Data Stream Awareness: Identifies if a
.ds-
index is a write index (cannot be deleted directly) or an older, potentially deletable backing index.- Specific Filtering: Only lists indices belonging to
.ds-logs-
and.ds-metrics-
data streams.- Detailed Output: Displays index name, docs count, store size (human-readable), creation date, and deletion status (e.g.,
DELETABLE
,WRITE INDEX
).- Output File Generation: Creates a
deletable_indices_YYYY.MM.txt
file containing a clean list of truly deletable index names, suitable for input into a deletion script.- Clear Warnings: Includes prominent warnings about manual verification and irreversible actions.
Usage:
- Run
./check_deletable_verbose_robust.sh
.- Provide your Elasticsearch credentials and the target month/year.
- Review the generated output on the console and the
deletable_indices_YYYY.MM.txt
file.- (Optional) Use the generated file with a companion deletion script, such as
delete_elasticsearch_indices.sh
.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
WARNING: This script identifies indices that may be deletable based on common criteria. However, it's crucial that you MANUALLY VERIFY the purpose and contents of each listed index before proceeding with any deletion. Indices related to Elasticsearch's internal functions, Kibana, or specific applications might be flagged as 'DELETABLE' but are essential for your cluster's operation.
**PROCEED WITH EXTREME CAUTION. We are not responsible for any data loss or operational issues resulting from the deletion of indices identified by this script.