def hello(n):
    question1 = """
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import zscore

# Load dataset
data = pd.read_csv(r"/content/drive/MyDrive/Dataset.csv - Sheet1.csv")
df = pd.DataFrame(data)

# ------------------------------
# Q1. Identify & handle missing values
# ------------------------------
print("Missing values per column:\n", df.isnull().sum())
print("\nNon-missing values per column:\n", df.notnull().sum())

# Fill numeric missing values (Age, Year_emp)
df['Age'] = df['Age'].fillna(df['Age'].mean())
df['Year_emp'] = df['Year_emp'].fillna(df['Year_emp'].mean())

# Fill categorical missing values (Occupation) with mode
df['Occupation'] = df['Occupation'].fillna(df['Occupation'].mode()[0])

# Fill satisfaction_level missing with mean
df['satisfaction_level'] = df['satisfaction_level'].fillna(df['satisfaction_level'].mean())

# Fill Income missing value with mean
df['Income'] = df['Income'].fillna(df['Income'].mean())

print("\nAfter filling missing values:\n", df.isnull().sum())

# Impact: print value counts before vs after (for Occupation and satisfaction)
print("\nOccupation distribution after fill:\n", df['Occupation'].value_counts())
print("\nSatisfaction level mean after fill:", df['satisfaction_level'].mean())
display(df)

# ------------------------------
# Q2. Transform satisfaction into binary (custom function)
# ------------------------------
def satisfaction_binary(x):
    return "High" if x > 0.7 else "Low"

df['satisfaction_binary'] = df['satisfaction_level'].apply(satisfaction_binary)
print("\nSatisfaction binary distribution:\n", df['satisfaction_binary'].value_counts())

# ------------------------------
# Q3. Convert purchase_history with map()
# ------------------------------
mapping = {"High": 2, "Medium": 1, "Low": 0}
df['purchase_history_num'] = df['purchase_history'].map(mapping)
print("\nPurchase history numeric mapping:\n", df[['purchase_history','purchase_history_num']])

# ------------------------------
# Q4. Identify outliers in Income
# ------------------------------
# Z-score method
df['income_zscore'] = zscore(df['Income'])
outliers_z = df[df['income_zscore'].abs() > 3]
print("\nOutliers detected by Z-score:\n", outliers_z[['ID','Income','income_zscore']])

# IQR method
Q1 = df['Income'].quantile(0.25)
Q3 = df['Income'].quantile(0.75)
IQR = Q3 - Q1
outliers_iqr = df[(df['Income'] < (Q1 - 1.5*IQR)) | (df['Income'] > (Q3 + 1.5*IQR))]
print("\nOutliers detected by IQR:\n", outliers_iqr[['ID','Income']])

# ------------------------------
# Q5. Handle Year_emp with mean vs median imputation
# ------------------------------
data = pd.read_csv(r"/content/drive/MyDrive/Dataset.csv - Sheet1.csv")

df_mean = data.copy()
df_median = data.copy()

df_mean['Year_emp'] = df_mean['Year_emp'].fillna(df_mean['Year_emp'].mean())
df_median['Year_emp'] = df_median['Year_emp'].fillna(df_median['Year_emp'].median())

print("\nOriginal missing in Year_emp:", data['Year_emp'].isnull().sum())
print("After mean imputation missing:", df_mean['Year_emp'].isnull().sum())
print("After median imputation missing:", df_median['Year_emp'].isnull().sum())

print("\nYear_emp mean after mean imputation:", df_mean['Year_emp'].mean())
print("Year_emp mean after median imputation:", df_median['Year_emp'].mean())

# Final dataset preview
print("\nFinal transformed dataset:\n", df.head())



---------------------------------------------------------------------------------------------------------
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

data = pd.read_csv(r"LAB-1.csv")
display(data)
print("Missing values per column:")
print(data.isnull().sum())
print("Values present per column:")
print(data.notnull().sum())
print("Filling missing values:")
df=pd.DataFrame(data)
df['Age'].fillna(df['Age'].mean())
display(df)
print(df.notnull().sum())
data=pd.read_csv(r"LAB-1.csv")
display(data)
print("\nDropping missing values:")
df=pd.DataFrame(data)
df.dropna(inplace=True)
display(df)

print("Total Revenue by Region: ")
total_revenue = df['revenue'].sum()
print(f'{total_revenue}')
revenue_by_region = df.groupby('region')['revenue'].sum()
print(f'\n{revenue_by_region}\n')
percentage_by_region = (revenue_by_region / total_revenue * 100).round(2)
print(percentage_by_region )

"""




    question2="""
import pandas as pd
import numpy as np

data=pd.read_csv()
df=pd.DataFrame(data)
print("Original Dataset")
display(df)
print()

#Q1
def fillin():
    df['Age'] = df['Age'].fillna(df['Age'].mean())
    df['City'] = df['City'].fillna('Unknown')
fillin()
print("\n\nDataset after filling missing values")
display(df)
print()

#Q2
def rem_dup():
    df.drop_duplicates(subset=['Age','City','Gender'],inplace=True)
    print("\n\nDataset after dropping duplicate values")
    display(df)
rem_dup()
print()

#Q3
gender_map={'M':'Male','F':'Female'}
def replace_inconsistent():
    df['Gender'].replace(gender_map, inplace=True)
replace_inconsistent()
print("\n\nDataset after removing inconsistent values")
display(df)
print()

#Q4
bins=[18,30,40,50]
labels=['18-30','30-40','40-50']
df['Age_range']=pd.cut(df['Age'],bins=bins,labels=labels,right=True,include_lowest=True)
print('\n\n Dataset after arranging in Age_range')
display(df[['Age','Age_range']])
print('\n\nWhole Dataset')
display(df)

def city_dummies(data):
    city_dummies=pd.get_dummies(df['City'],prefix='City')
    df_with_city_dummies=pd.concat([data,city_dummies],axis=1)
    print('\n\nDataset with dummie values')
    display(df_with_city_dummies)
city_dummies(df)
"""




    question3="""
import pandas as pd
import numpy as np
Sales_data=pd.read_csv()
sdf=pd.DataFrame(Sales_data)
print("Original Sales Dataset")
display(sdf)
print()

Customer_data=pd.read_csv()
cdf=pd.DataFrame(Customer_data)
print("Original Customer Dataset")
display(cdf)
print()


#Q1
df_multiindex = sdf.set_index(['Product', 'Month'])
display(df_multiindex)

val=sdf.groupby('Month')['Sales'].mean()
display(val)

#Q2
# Inner merge on 'ID'
merged_inner = pd.merge(sdf, cdf, on='Order_id', how='inner')
print("Inner Merge:\n")
display(merged_inner)

# Left merge on 'ID'
merged_left = pd.merge(sdf, cdf, on='Order_id', how='left')
print("\nLeft Merge:\n")
display(merged_left)

# Outer merge on 'ID'
merged_outer = pd.merge(sdf, cdf, on='Order_id', how='outer')
print("\nOuter Merge:\n")
display(merged_outer)

#Q3
res1 = pd.concat([sdf, cdf],axis=0)
print("Concat Vertical:\n")
display(res1)

res2 = pd.concat([sdf, cdf],axis=1)
print("Concat Vertical:\n")
display(res2)

#Q4
def Combine_and_CheckMissing(df1, df2):
    comb = pd.merge(df1, df2, on='Order_id', how='outer')
    comb['Feedback_score'] = comb['Feedback_score'].fillna(comb['Feedback_score'].mean())
    display (comb)
    
display(Combine_and_CheckMissing(sdf,cdf))

# Q5
def pivot_sales_data(file_path):
    # Read the sales data
    df = pd.read_csv(file_path)
    
    # Pivot the table: products as rows, months as columns, sales as values
    pivot_df = df.pivot_table(index='Product', columns='Month', values='Sales', aggfunc='sum')
    
    # Reset index to make Product a column again
    pivot_df = pivot_df.reset_index()
    
    return pivot_df

# Example usage
pivoted_sales = pivot_sales_data()
print(pivoted_sales)

    
display(Combine_and_CheckMissing(sdf,cdf))
"""




    question4="""
import pandas as pd
import numpy as np

# --- Initial Dataset Setup ---

data = {
    'Country': ['USA', 'USA', 'China', 'China', 'Germany', 'Germany'],
    'Year': [2020, 2021, 2020, 2021, 2020, 2021],
    'Population': [331000000, 332000000, 1402000000, 1403000000, 83000000, 83100000],
    'GDP': [21500000, 21500000, 14700000, 14800000, 4000000, 4200000],
    'Life_Expectancy': [78.5, 78.7, 76.9, 77.1, 81.3, 81.5]
}
df = pd.DataFrame(data)

print("----------- Original DataFrame -----------")
print(df)
print("\n" + "="*50 + "\n")


# --- Question 1: Compute Summary Statistics ---

print("----------- Question 1: Summary Statistics -----------")
print("--- Using Pandas Methods ---")
print(f"Population Mean: {df['Population'].mean():,.0f}")
print(f"Population Sum: {df['Population'].sum():,.0f}")
print(f"GDP Max: {df['GDP'].max():,.0f}")
print(f"GDP Min: {df['GDP'].min():,.0f}")
print("\n--- Using NumPy Functions ---")
print(f"Population Mean (NumPy): {np.mean(df['Population']):,.0f}")
print(f"Population Std Dev (NumPy): {np.std(df['Population']):,.2f}")
print(f"GDP Sum (NumPy): {np.sum(df['GDP']):,.0f}")
print("\n" + "="*50 + "\n")


# --- Question 2: Increase GDP by 10% ---

print("----------- Question 2: Increase GDP by 10% -----------")
df['GDP_Increased'] = df['GDP'] * 1.10
print("DataFrame with GDP increased by 10%:")
print(df[['Country', 'Year', 'GDP', 'GDP_Increased']])
print("\n" + "="*50 + "\n")


# --- Question 3: Swap Index Levels ---

print("----------- Question 3: Swap Index Levels -----------")
df_swapped = df_multi_index.swaplevel().sort_index()
print("DataFrame with swapped and sorted index (Year, Country):")
print(df_swapped)
print("\n" + "="*50 + "\n")

# --- Question 4: Pivot Data with unstack() ---

print("----------- Question 4: Unstacking to Pivot Data -----------")
df_unstacked = df_multi_index.unstack()
print("Unstacked DataFrame (Years as columns):")
print(df_unstacked[['GDP', 'Life_Expectancy']])
print("\n" + "="*50 + "\n")

# --- Question 5: Create Hierarchical Index ---

print("----------- Question 5: Hierarchical Indexing -----------")
df_multi_index = df.set_index(['Country', 'Year'])
print("DataFrame with Multi-Index (Country, Year):")
print(df_multi_index)
print("\n" + "="*50 + "\n")



"""







    question5 = """
import pandas as pd
import numpy as np
Sales_data=pd.read_csv()
df=pd.DataFrame(Sales_data)
print("Original Sales Dataset")
display(df)

#Q1
pivot_table = pd.pivot_table(
    Sales_data,
    values = 'Revenue',
    index = 'Date',
    columns = 'Salesperson',
    aggfunc = "sum"
)
display(pivot_table)

#Q2
avg_rev=df.groupby("Product")["Revenue"].mean()
display(avg_rev)

#Q3
max_unit_sold=df.groupby("Salesperson")["Units_Sold"].max()
display(max_unit_sold)

#Q4
revenue_by_region=df.groupby("Region")["Revenue"].sum()
Percentage_revenure_region=(revenue_by_region/revenue_by_region.sum())*100
display(Percentage_revenure_region)

#Q5
transaction_per_salesperson = df["Salesperson"].value_counts()
most_transaction_per_salesperson = transaction_per_salesperson.idxmax()
most_transaction_count = transaction_per_salesperson.max()

print(f"{most_transaction_per_salesperson} completed the most sales transactions: {most_transaction_count}")

#Q6
pivot_revenue=pd.pivot_table(
    Sales_data,
    values = ["Revenue","Units_Sold"],
    index = 'Salesperson',
    columns = 'Product',
    aggfunc = "sum",
    fill_value=0
)
display(pivot_revenue)

#Q7
pivot_unit_region=pd.pivot_table(
    Sales_data,
    values = "Units_Sold",
    index = 'Date',
    columns = 'Region',
    aggfunc = "sum",
    fill_value=0
)
display(pivot_revenue)
"""





    question6 = """
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
games_df = pd.read_csv('basketball_games.csv')
players_df = pd.read_csv('basketball_players.csv')
games_df['Date'] = pd.to_datetime(games_df['Date'])

print("1. Team's Points Score Over the Season:")
plt.figure(figsize=(12, 6))
plt.plot(games_df['Date'], games_df['Team_Points'],
         marker='x', linewidth=2, markersize=8)
plt.title("Team's Points Score Over the Season", fontsize=10, fontweight='bold')
plt.xlabel('Date', fontsize=10)
plt.ylabel('Points Scored', fontsize=12)
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

trend=np.polyfit(range(len(games_df)),games_df['Team_Points'],1)[0]
trend_direction="improving" if trend>0 else "declining" if trend <0 else "stable"
print(f"Points trend : {trend_direction} (slope: {trend:.2f})")

print(f"\n2. Average Attendance: {games_df['Attendance'].mean():.0f} people")
print(f"   Highest Attendance: {games_df['Attendance'].max():.0f} people")
print(f"   Lowest Attendance: {games_df['Attendance'].min():.0f} people")

plt.figure(figsize=(10, 6))
plt.hist(games_df['Attendance'], bins=8, alpha=0.7, edgecolor='black')
plt.title('Distribution of Game Attendance', fontsize=16, fontweight='bold')
plt.xlabel('Attendance', fontsize=12)
plt.ylabel('Number of Games', fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.show()

player_points = players_df.groupby('Player')['Points'].sum().sort_values(ascending=False)
top_scorer = player_points.index[0]
top_score = player_points.iloc[0]

print(f"\n3. Top Scorer: {top_scorer} with {top_score} total points")

plt.figure(figsize=(10, 6))
player_points.plot(kind='bar', color=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd'])
plt.title('Total Points Scored by Each Player', fontsize=16, fontweight='bold')
plt.xlabel('Player', fontsize=12)
plt.ylabel('Total Points', fontsize=12)
plt.grid(True)
plt.tight_layout()
plt.show()

threshold = 100
games_above_threshold = len(games_df[games_df['Team_Points'] > threshold])
print(f"\nGames scoring above {threshold} points: {games_above_threshold}/{len(games_df)}")

bins = [80, 90, 100, 110, 120]
labels = ['80-89', '90-99', '100-109', '110-119']
games_df['Point_Range'] = pd.cut(games_df['Team_Points'], bins=bins, labels=labels, right=False)
point_range_counts = games_df['Point_Range'].value_counts().sort_index()

plt.figure(figsize=(10, 6))
point_range_counts.plot(kind='bar', color='skyblue', edgecolor='black')
plt.title('Number of Games by Points Scored Range', fontsize=16, fontweight='bold')
plt.xlabel('Points Range', fontsize=12)
plt.ylabel('Number of Games', fontsize=12)
plt.grid(True,axis="y")
plt.tight_layout()
plt.show()

opponent_performance = games_df.groupby('Opponent').agg({
    'Team_Points': 'mean',
    'Result': lambda x: (x == 'Win').sum()
}).round(1)

opponent_performance.columns = ['Avg_Points_Against', 'Wins']
opponent_performance = opponent_performance.sort_values('Avg_Points_Against', ascending=False)

print(f"\nTeam Performance Against Opponents (by average points scored):")
display(opponent_performance.reset_index())

plt.figure(figsize=(12, 6))
opponent_performance['Avg_Points_Against'].sort_values().plot(kind='barh', color='Blue')
plt.title('Average Points Scored Against Each Opponent', fontsize=16, fontweight='bold')
plt.xlabel('Average Points Scored', fontsize=12)
plt.ylabel('Opponent', fontsize=12)
plt.grid(True, axis='x')
plt.tight_layout()
plt.show()

opponent_attendance = games_df.groupby('Opponent')['Attendance'].mean().sort_values(ascending=False)

print(f"\n Average Attendance by Opponent:")
for opponent, attendance in opponent_attendance.items():
    print(f"   {opponent}: {attendance:.0f} average attendance")

plt.figure(figsize=(12, 6))
opponent_attendance.sort_values().plot(kind='barh', color='orange')
plt.title('Average Attendance by Opponent', fontsize=16, fontweight='bold')
plt.xlabel('Average Attendance', fontsize=12)
plt.ylabel('Opponent', fontsize=12)
plt.grid(True, alpha=0.3, axis='x')
plt.tight_layout()
plt.show()

win_loss_stats = games_df.groupby('Result').agg({
    'Team_Points': ['count', 'mean'],
    'Game_ID': 'count'
}).round(1)

win_loss_stats.columns = ['Games_Count', 'Avg_Points', 'Total_Games']
win_loss_stats = win_loss_stats.reset_index()

print(f"\n Win-Loss Record vs Points Scored:")
for _,row in win_loss_stats.iterrows():
    print(f"   {row['Result']}: {row['Games_Count']} games, {row['Avg_Points']} avg points")

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

results = win_loss_stats['Result']
game_counts = win_loss_stats['Games_Count']
colors = ['green' if result == 'Win' else 'red' if result == 'Loss' else 'gray' for result in results]

ax1.bar(results, game_counts, color=colors, alpha=0.7, edgecolor='black')
ax1.set_title('Win-Loss-Tie Record', fontsize=14, fontweight='bold')
ax1.set_ylabel('Number of Games', fontsize=12)
ax1.grid(True, alpha=0.3, axis='y')

avg_points = win_loss_stats['Avg_Points']
ax2.bar(results, avg_points, color=colors, alpha=0.7, edgecolor='black')
ax2.set_title('Average Points by Game Result', fontsize=14, fontweight='bold')
ax2.set_ylabel('Average Points', fontsize=12)
ax2.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()
"""


    question7 = """
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

df=pd.read_csv()

df.head()

df['Date/Time'] = pd.to_datetime(df['Date/Time'])
df['date'] = df['Date/Time'].dt.date
df['month'] = df['Date/Time'].dt.to_period('M').astype(str)
df['hour'] = df['Date/Time'].dt.hour
df['Coordinates'] = list(zip(df['Lat'], df['Lon']))
df['DayOfWeek'] = df['Date/Time'].dt.day_name()
df.head()

# Heatmap of pickups by hour and weekday
heatmap_data = df.groupby(['DayOfWeek', 'hour']).size().unstack()

days_order=['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
heatmap_data=heatmap_data.reindex(days_order)

plt.figure(figsize=(12,6))
sns.heatmap(heatmap_data,linewidth=3,cmap="YlOrRd")
plt.title("Heatmap of Pickups (Weekday vs Hour)")
plt.xlabel("Hour of Day")
plt.ylabel("Weekday (0=Mon, 6=Sun)")
plt.show()

MONTH_FOR_TREND = df['month'].iloc[0]  # pick first available month
daily = df[df['month'] == MONTH_FOR_TREND].groupby("date").size().reset_index(name="pickups")

plt.figure(figsize=(14,5))
plt.plot(daily["date"], daily["pickups"], marker="o")
plt.title(f"Daily Uber Pickups in {MONTH_FOR_TREND}")
plt.xlabel("Date")
plt.ylabel("Number of Pickups")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


REGION_COL = df.columns[-1]  # assume last column is region
display(REGION_COL)
region_counts = df.groupby(REGION_COL).size().reset_index(name="count").sort_values("count", ascending=False).head(30)
region_counts["size"] = region_counts["count"] / region_counts["count"].max() * 3000

plt.figure(figsize=(15,8))
plt.scatter(range(len(region_counts)), region_counts["count"], s=region_counts["size"], alpha=0.6, edgecolors="k")
plt.xticks(range(len(region_counts)), region_counts[REGION_COL], rotation=45, ha="right")
plt.xlabel("Region")
plt.ylabel("Total Pickups")
plt.title("Top Regions by Uber Pickups")
plt.tight_layout()
plt.show()
"""


    question8 = """
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
titanic = pd.read_csv()

plt.figure(figsize=(8, 5))
sns.barplot(data=titanic, x='Pclass', y='Survived', palette='Blues_d', ci=None)
plt.title('Survival Rate by Passenger Class')
plt.xlabel('Passenger Class')
plt.ylabel('Survival Rate')
plt.xticks([0, 1, 2], ['1st Class', '2nd Class', '3rd Class'])
plt.tight_layout()
plt.show()

survival_counts = titanic['Survived'].value_counts()
labels = ['Did Not Survive', 'Survived']
colors = ['#ff9999', '#66b3ff']

plt.figure(figsize=(6, 6))
plt.pie(survival_counts, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
plt.title('Proportion of Survivors vs. Non-Survivors')
plt.show()

grouped = titanic.groupby(['Pclass', 'Sex', 'Survived']).size().unstack(fill_value=0)

grouped.plot(kind='bar', stacked=True, color=['#ff9999', '#66b3ff'], figsize=(10, 6))
plt.title('Survivors and Non-Survivors by Class and Sex')
plt.xlabel('Passenger Class and Sex')
plt.ylabel('Count')
plt.legend(['Did Not Survive', 'Survived'], title='Outcome')
plt.tight_layout()
plt.show()

"""

    question9="""
import kagglehub

# Download latest version
path = kagglehub.dataset_download()

print("Path to dataset files:", path)

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

data=pd.read_csv()
data.head()

plt.figure(figsize=(10,8))
plt.scatter(data['GrLivArea'],data['SalePrice'])
plt.xlabel("GrLivArea")
plt.ylabel("SalePrice")
plt.show()

cols = ['GrLivArea', 'OverallQual', 'TotalBsmtSF']
corr_matrix = data[cols].corr()

plt.figure(figsize=(6, 4))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()

plt.figure(figsize=(10,8))
sns.scatterplot(
    x='GrLivArea',
    y='SalePrice',
    data=data,
    size=data['OverallQual'],
    sizes=(50, 200),
    hue='OverallQual',
    alpha=0.5
)
plt.show()
"""

    question10 = """
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import squarify  # for tree map

df = pd.read_csv()


plt.figure(figsize=(10,4))
df['player_positions'].str.split(',').explode().value_counts().plot(kind='bar', color='skyblue')
plt.title("Number of Players per Position")
plt.xlabel("Position")
plt.ylabel("Count")
plt.tight_layout()
plt.show()

pos_rating = df.groupby(df['player_positions'].str.split(',').str[0])['overall'].mean()

plt.figure(figsize=(6,6))
plt.pie(pos_rating, labels=pos_rating.index, autopct='%1.1f%%', wedgeprops={'width':0.4})
plt.title("Average Overall Rating by Position (Donut Chart)")
plt.show()

club_group = df.groupby('club_name')['overall'].mean().sort_values(ascending=False).head(25)
plt.figure(figsize=(12,6))
squarify.plot(sizes=club_group.values, label=club_group.index, alpha=0.8)
plt.title("Club Hierarchy by Average Overall Rating (Treemap)")
plt.axis('off')
plt.show()

"""


    questions = {
        1: question1,
        2: question2,
        3: question3,
        4: question4,
        5: question5,
        6: question6,
        7: question7,
        8: question8,
        9: question9,
        10: question10,
    }

    snippet = questions.get(n)
    if snippet is None:
        print(f"Invalid option: {n}. Please choose an integer between 1 and 10.")
    else:
        print(snippet)

    