radupotop · November 25, 2024 21:07
diff --git a/pandas-demo.py b/pandas-demo.py
 import pandas as pd

 # Create sample data
 data = {
    'Name': ['John', 'Emma', 'Alex', 'Sarah', 'Mike'],
    'Age': [28, 24, 32, 27, 30],
    'City': ['New York', 'London', 'Paris', 'Tokyo', 'Berlin'],
    'Salary': [75000, 65000, 85000, 70000, 80000],
    'Department': ['IT', 'HR', 'Finance', 'Marketing', 'IT'],
    'Date_Joined': ['2020-01-15', '2019-08-22', '2021-03-10', '2020-11-30', '2018-05-14'],
 }

 # Create DataFrame
 df = pd.DataFrame(data)

 # Convert Date_Joined to datetime
 df['Date_Joined'] = pd.to_datetime(df['Date_Joined'])

 print(df)

 # Basic statistics
 print(df.describe())

 # Filter data
 print(df[df['Salary'] > 70000])

 # Group by
 print(df.groupby('Department')['Salary'].mean())

 # Sort values
 print(df.sort_values('Age', ascending=False))

 ##################################

 # From a CSV file
 # df = pd.read_csv('file.csv')

 # 2. Basic Operations
 # View first few rows
 print(df.head())

 # Get basic information about DataFrame
 print(df.info())

 # Get statistical summary
 print(df.describe())

 # 3. Accessing Data
 # Select column
 print(df['Name'])

 # Select multiple columns
 print(df[['Name', 'Age']])

 # Select rows by index
 print(df.loc[0])

 # Select rows by condition
 print(df[df['Age'] > 25])

 # 4. Data Manipulation
 # Add new column
 df['Country'] = ['USA', 'France', 'UK', 'Poland', 'Spain']

 # Sort values
 df_sorted = df.sort_values('Age')

 # Filter missing values
 df_clean = df.dropna()

 # Fill missing values
 df_filled = df.fillna(0)

 # 5. Grouping and Aggregation
 # Group by column
 grouped = df.groupby('City')

 # Calculate mean for each group
 print(grouped['Age'].mean())

 # 6. Data Transformation
 # Apply function to column
 df['Age_Doubled'] = df['Age'].apply(lambda x: x * 2)

 # 7. Merging DataFrames
 df2 = pd.DataFrame(
    {'City': ['New York', 'Paris', 'London'], 'Population': [8400000, 2200000, 8900000]}
 )

 # Merge on 'City'
 merged_df = pd.merge(df, df2, on='City')

 # 8. Export Data
 # To CSV
 # df.to_csv('output.csv', index=False)

 # To Excel
 # df.to_excel('output.xlsx', index=False)

 """
 Key concepts covered:
 1. Creating DataFrames
 2. Basic operations and information
 3. Accessing data (columns and rows)
 4. Data manipulation (adding columns, sorting, handling missing values)
 5. Grouping and aggregation
 6. Data transformation
 7. Merging DataFrames
 8. Exporting data

 Common useful methods:
 - `head()`: View first few rows
 - `info()`: Get DataFrame information
 - `describe()`: Statistical summary
 - `loc[]`: Access by label
 - `iloc[]`: Access by position
 - `groupby()`: Group data
 - `merge()`: Combine DataFrames
 - `sort_values()`: Sort data
 - `dropna()`: Remove missing values
 - `fillna()`: Fill missing values

 This covers the basics of pandas. There are many more features and functions available depending on your specific needs.
 """
	import pandas as pd

	# Create sample data
	data = {
	'Name': ['John', 'Emma', 'Alex', 'Sarah', 'Mike'],
	'Age': [28, 24, 32, 27, 30],
	'City': ['New York', 'London', 'Paris', 'Tokyo', 'Berlin'],
	'Salary': [75000, 65000, 85000, 70000, 80000],
	'Department': ['IT', 'HR', 'Finance', 'Marketing', 'IT'],
	'Date_Joined': ['2020-01-15', '2019-08-22', '2021-03-10', '2020-11-30', '2018-05-14'],
	}

	# Create DataFrame
	df = pd.DataFrame(data)

	# Convert Date_Joined to datetime
	df['Date_Joined'] = pd.to_datetime(df['Date_Joined'])

	print(df)

	# Basic statistics
	print(df.describe())

	# Filter data
	print(df[df['Salary'] > 70000])

	# Group by
	print(df.groupby('Department')['Salary'].mean())

	# Sort values
	print(df.sort_values('Age', ascending=False))

	##################################

	# From a CSV file
	# df = pd.read_csv('file.csv')

	# 2. Basic Operations
	# View first few rows
	print(df.head())

	# Get basic information about DataFrame
	print(df.info())

	# Get statistical summary
	print(df.describe())

	# 3. Accessing Data
	# Select column
	print(df['Name'])

	# Select multiple columns
	print(df[['Name', 'Age']])

	# Select rows by index
	print(df.loc[0])

	# Select rows by condition
	print(df[df['Age'] > 25])

	# 4. Data Manipulation
	# Add new column
	df['Country'] = ['USA', 'France', 'UK', 'Poland', 'Spain']

	# Sort values
	df_sorted = df.sort_values('Age')

	# Filter missing values
	df_clean = df.dropna()

	# Fill missing values
	df_filled = df.fillna(0)

	# 5. Grouping and Aggregation
	# Group by column
	grouped = df.groupby('City')

	# Calculate mean for each group
	print(grouped['Age'].mean())

	# 6. Data Transformation
	# Apply function to column
	df['Age_Doubled'] = df['Age'].apply(lambda x: x * 2)

	# 7. Merging DataFrames
	df2 = pd.DataFrame(
	{'City': ['New York', 'Paris', 'London'], 'Population': [8400000, 2200000, 8900000]}
	)

	# Merge on 'City'
	merged_df = pd.merge(df, df2, on='City')

	# 8. Export Data
	# To CSV
	# df.to_csv('output.csv', index=False)

	# To Excel
	# df.to_excel('output.xlsx', index=False)

	"""
	Key concepts covered:
	1. Creating DataFrames
	2. Basic operations and information
	3. Accessing data (columns and rows)
	4. Data manipulation (adding columns, sorting, handling missing values)
	5. Grouping and aggregation
	6. Data transformation
	7. Merging DataFrames
	8. Exporting data

	Common useful methods:
	- `head()`: View first few rows
	- `info()`: Get DataFrame information
	- `describe()`: Statistical summary
	- `loc[]`: Access by label
	- `iloc[]`: Access by position
	- `groupby()`: Group data
	- `merge()`: Combine DataFrames
	- `sort_values()`: Sort data
	- `dropna()`: Remove missing values
	- `fillna()`: Fill missing values

	This covers the basics of pandas. There are many more features and functions available depending on your specific needs.
	"""