Skip to content

Instantly share code, notes, and snippets.

@aruruka
Created June 30, 2021 17:23
Show Gist options
  • Save aruruka/3e9be112bcfb5d458a83263981fb9ca1 to your computer and use it in GitHub Desktop.
Save aruruka/3e9be112bcfb5d458a83263981fb9ca1 to your computer and use it in GitHub Desktop.
Delete something to create some disk space for Squadron hosts. Python script.
#!/usr/bin/env python
# Purpose: Delete something to create some disk space for Squadron hosts.
# Prerequisite:
# This Python script is run by "make_disk_space_squadron_hosts.sh", on every Squadron CDP cluster host.
# Before running this script, make sure to stop all services including Cloudera Management Services,
# but only leave Zookeeper and HDFS running, because we'll use hdfs command to delete something on HDFS.
# Author: ShounenG
# --------------------------------------
import os, subprocess, socket, re, shutil
HOST_NAME = socket.gethostname()
NODE1_REGEXP = re.compile(r'c3669-node1', re.IGNORECASE)
NODE2_REGEXP = re.compile(r'c3669-node2', re.IGNORECASE)
folders_to_delete_on_node1 = ['/var/lib/cloudera-service-monitor', '/var/lib/cloudera-host-monitor', '/opt/cloudera/parcel-repo']
folders_to_delete_other_than_node1 = ['/var/log/hadoop-hdfs', '/var/log/solr-infra', '/var/log/hadoop-yarn', '/var/log/nifi']
def main():
if re.search(NODE1_REGEXP, HOST_NAME):
for folder in folders_to_delete_on_node1:
if os.path.exists(folder):
print(folder)
contents = [os.path.join(folder, i) for i in os.listdir(folder)]
[shutil.rmtree(i, True) if os.path.isdir(i) else os.unlink(i) for i in contents]
subprocess.Popen("setfacl -R -m u:cloudera-scm:r-x /opt/cloudera/parcel-repo", shell=True)
else:
for folder in folders_to_delete_other_than_node1:
if os.path.exists(folder):
print(folder)
contents = [os.path.join(folder, i) for i in os.listdir(folder)]
[shutil.rmtree(i, True) if os.path.isdir(i) else os.unlink(i) for i in contents]
if re.search(NODE2_REGEXP, HOST_NAME):
print("Executing commands: kinit -kt $(ls -1 /var/run/cloudera-scm-agent/process/[0-9]*-hdfs-NAMENODE-*/hdfs.keytab|tail -n 1) hdfs/`hostname -f`")
p = subprocess.Popen("kinit -kt $(ls -1 /var/run/cloudera-scm-agent/process/[0-9]*-hdfs-NAMENODE-*/hdfs.keytab|tail -n 1) hdfs/`hostname -f`", stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
(output, err) = p.communicate()
print(output, err)
print("Executing commands: hdfs dfs -rm -r -skipTrash /user/spark/applicationHistory/*")
p = subprocess.Popen("hdfs dfs -rm -r -skipTrash /user/spark/applicationHistory/*",stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
(output, err) = p.communicate()
print(output, err)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment