Created
June 30, 2021 17:23
-
-
Save aruruka/3e9be112bcfb5d458a83263981fb9ca1 to your computer and use it in GitHub Desktop.
Delete something to create some disk space for Squadron hosts. Python script.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Purpose: Delete something to create some disk space for Squadron hosts. | |
# Prerequisite: | |
# This Python script is run by "make_disk_space_squadron_hosts.sh", on every Squadron CDP cluster host. | |
# Before running this script, make sure to stop all services including Cloudera Management Services, | |
# but only leave Zookeeper and HDFS running, because we'll use hdfs command to delete something on HDFS. | |
# Author: ShounenG | |
# -------------------------------------- | |
import os, subprocess, socket, re, shutil | |
HOST_NAME = socket.gethostname() | |
NODE1_REGEXP = re.compile(r'c3669-node1', re.IGNORECASE) | |
NODE2_REGEXP = re.compile(r'c3669-node2', re.IGNORECASE) | |
folders_to_delete_on_node1 = ['/var/lib/cloudera-service-monitor', '/var/lib/cloudera-host-monitor', '/opt/cloudera/parcel-repo'] | |
folders_to_delete_other_than_node1 = ['/var/log/hadoop-hdfs', '/var/log/solr-infra', '/var/log/hadoop-yarn', '/var/log/nifi'] | |
def main(): | |
if re.search(NODE1_REGEXP, HOST_NAME): | |
for folder in folders_to_delete_on_node1: | |
if os.path.exists(folder): | |
print(folder) | |
contents = [os.path.join(folder, i) for i in os.listdir(folder)] | |
[shutil.rmtree(i, True) if os.path.isdir(i) else os.unlink(i) for i in contents] | |
subprocess.Popen("setfacl -R -m u:cloudera-scm:r-x /opt/cloudera/parcel-repo", shell=True) | |
else: | |
for folder in folders_to_delete_other_than_node1: | |
if os.path.exists(folder): | |
print(folder) | |
contents = [os.path.join(folder, i) for i in os.listdir(folder)] | |
[shutil.rmtree(i, True) if os.path.isdir(i) else os.unlink(i) for i in contents] | |
if re.search(NODE2_REGEXP, HOST_NAME): | |
print("Executing commands: kinit -kt $(ls -1 /var/run/cloudera-scm-agent/process/[0-9]*-hdfs-NAMENODE-*/hdfs.keytab|tail -n 1) hdfs/`hostname -f`") | |
p = subprocess.Popen("kinit -kt $(ls -1 /var/run/cloudera-scm-agent/process/[0-9]*-hdfs-NAMENODE-*/hdfs.keytab|tail -n 1) hdfs/`hostname -f`", stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) | |
(output, err) = p.communicate() | |
print(output, err) | |
print("Executing commands: hdfs dfs -rm -r -skipTrash /user/spark/applicationHistory/*") | |
p = subprocess.Popen("hdfs dfs -rm -r -skipTrash /user/spark/applicationHistory/*",stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) | |
(output, err) = p.communicate() | |
print(output, err) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment