import pandas as pd
import numpy as np

# Create a DataFrame with event data from a CSV file
event_data = pd.read_csv("data/uc_berkeley_events.csv", index_col="Year")
event_data

# Create a DataFrame with information about landmarks at UC Berkeley
data = {
    'Landmark': ['Sather Gate', 'Campanile', 'Doe Library', 'Memorial Glade', 'Sproul Plaza'],
    'Type': ['Gate', 'Tower', 'Library', 'Open Space', 'Plaza'],
    'Height': [30, 307, 80, 0, 0],
    'Year Built': [1910, 1914, 1911, None, 1962]
}
df = pd.DataFrame(data)

df

welcome_series = pd.Series(["welcome", "to", "CS 189"])

welcome_series.index

RangeIndex(start=0, stop=3, step=1)

welcome_series.values

array(['welcome', 'to', 'CS 189'], dtype=object)

# Display the first 5 rows
display(df.head())

# Display the last 3 rows
display(df.tail(3))

# Get information about the DataFrame
display(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Landmark    5 non-null      object 
 1   Type        5 non-null      object 
 2   Height      5 non-null      int64  
 3   Year Built  4 non-null      float64
dtypes: float64(1), int64(1), object(2)
memory usage: 292.0+ bytes

None

# Get descriptive statistics
display(df.describe())

# Get the shape of the DataFrame
print("Shape of df:", df.shape)

# Get the size of the DataFrame
print("Size of df:", df.size)

Shape of df: (5, 4)
Size of df: 20

# Randomly sample 2 rows from the DataFrame
sampled_data = df.sample(n=2)
display(sampled_data)

# Randomly sample 40% of the rows from the DataFrame
sampled_fraction = df.sample(frac=0.4)
display(sampled_fraction)

# Get the count of unique values in the 'Type' column of df
type_counts = df['Type'].value_counts()
display(type_counts)

Type
Gate          1
Tower         1
Library       1
Open Space    1
Plaza         1
Name: count, dtype: int64

# Get unique values from the 'Type' column in the df DataFrame
unique_types = df['Type'].unique()
print(unique_types)

['Gate' 'Tower' 'Library' 'Open Space' 'Plaza']

# Accessing an entry by integer location using iloc
display(df.iloc[0, 1])  # Access the value in the first row and second column

'Gate'

# Accessing an entire column using iloc
display(df.iloc[:, 1])  # Access all rows in the second column

0          Gate
1         Tower
2       Library
3    Open Space
4         Plaza
Name: Type, dtype: object

# Accessing an entire row using iloc
display(df.iloc[0])  # Access all columns in the first row

Landmark      Sather Gate
Type                 Gate
Height                 30
Year Built         1910.0
Name: 0, dtype: object

# Accessing a slice of rows using iloc
display(df.iloc[1:3])  # Access rows 1 to 2 (exclusive of 3)

# Accessing a slice of columns using iloc
display(df.iloc[:, 1:3])  # Access all rows for columns 1 to 2 (exclusive of 3)

# Accessing a specific range of rows and columns using iloc
display(df.iloc[1:3, 1:3])  # Access rows 1 to 2 and columns 1 to 2 (exclusive of 3)

# Accessing an entry by label using loc
display(df.loc[0, 'Landmark'])  # Access the value in the first row and 'Landmark' column

'Sather Gate'

# Accessing an entire column using loc
display(df.loc[:, 'Landmark'])  # Access all rows in the 'Landmark' column

0       Sather Gate
1         Campanile
2       Doe Library
3    Memorial Glade
4      Sproul Plaza
Name: Landmark, dtype: object

# Accessing an entire row using loc
display(df.loc[0])  # Access all columns in the first row

Landmark      Sather Gate
Type                 Gate
Height                 30
Year Built         1910.0
Name: 0, dtype: object

# Accessing a slice of rows using loc
display(df.loc[1:3])  # Access rows 1 to 3 (inclusive)

# Accessing a slice of columns using loc
display(df.loc[:, 'Landmark':'Height'])  # Access all rows for columns 'Landmark' to 'Height'

# Accessing a specific range of rows and columns using loc
display(df.loc[1:3, 'Landmark':'Height'])  # Access rows 1 to 3 and columns 'Landmark' to 'Height'

# Accessing a single column using context-dependent selection
display(df['Year Built'])  # Access the 'Year Built' column

0    1910.0
1    1914.0
2    1911.0
3       NaN
4    1962.0
Name: Year Built, dtype: float64

# Accessing multiple columns using context-dependent selection
display(df[['Landmark', 'Year Built']])  # Access the 'Landmark' and 'Year Built' columns

# Accessing a slice of rows using context-dependent selection
display(df[1:3])  # Access rows 1 to 2 (exclusive of 3)

# Filter by a single condition
display(df[df['Height'] > 50])

# Filter by multiple conditions
display(df[(df['Height'] > 50) & (df['Type'] == 'Library')])

# Filter using isin()
display(df[df['Type'].isin(['Gate', 'Plaza'])])

# Add a new column
df['Experience'] = [2, 5, 1, 8, 4]
display(df)

# Add a calculated column
df['Height_Increase'] = df['Height'] * 0.10
display(df)

# Modify existing values using .loc[]
df.loc[df['Landmark'] == 'Sather Gate', 'Height_Increase'] = 5
display(df)

# Drop the 'Experience' column from the DataFrame
# Note: This operation does not modify the original DataFrame since inplace=False by default
df.drop(columns=['Experience'])
display(df)

# Drop the 'Experience' column using the inplace parameter
df.drop(columns=['Experience'], inplace=True)

display(df)

# Reassign the DataFrame to drop the 'Height_Increase' column
df_dropped = df.drop(columns=['Height_Increase'])

display(df_dropped)

# Display the original DataFrame to show it remains unchanged after reassignment
display(df)

df = df_dropped

df

# Sort by a single column
display(df.sort_values(by='Height'))

# Sort by multiple columns
display(df.sort_values(by=['Height', 'Type'], ascending=[True, False]))

# Introduce missing values
df_missing = df.copy()
df_missing.loc[0, 'Year Built'] = np.nan
df_missing.loc[2, 'Height'] = np.nan
df_missing.loc[4, 'Type'] = None
display(df_missing)

# Check for missing values
display(df_missing.isnull())

# Count missing values per column
display(df_missing.isnull().sum())

Landmark      0
Type          1
Height        1
Year Built    2
dtype: int64

# Drop rows with missing values
display(df_missing.dropna())

# Fill missing values
display(df_missing.fillna(0))

# Fills missing values in `df_missing` with defaults: mean for 'Year Built', median for 'Height', and 'Unknown' for 'Type', then displays the result.
display(df_missing.fillna({'Year Built': df_missing['Year Built'].mean(), 'Height': df_missing['Height'].median(), 'Type': 'Unknown'}))

# Calculate mean of the 'Height' column
height_mean = df['Height'].mean()
print(height_mean)

# Calculate sum of the 'Height' column
height_sum = df['Height'].sum()
print(height_sum)

83.4
417

# Calculate the standard deviation of the 'Height' column
height_std = df['Height'].std()
print(height_std)

129.2006191935627

# Find the index of the maximum value in the 'Height' column
max_height_index = df['Height'].idxmax()
print("Index of maximum height:", max_height_index)

Index of maximum height: 1

augmented_df = pd.read_csv("data/Augmented_Landmarks_DataFrame.csv")
augmented_df

# Group the DataFrame by the 'Type' column
grouped = augmented_df.groupby('Type')

# Iterate through each group and display its content
for group_name, group_data in grouped:
    print(f"Group: {group_name}")
    display(group_data)

Group: Gate

Group: Library

Group: Open Space

Group: Plaza

Group: Tower

augmented_df.groupby('Type')[['Height']].mean()

augmented_df.groupby('Type')[['Height', 'Year Built']].mean()

augmented_df.groupby(['Type', 'Campus'])[['Height']].agg('max')

# Create a pivot table to summarize the average Height for each Type and Campus
pivot_table = pd.pivot_table(
    augmented_df,
    index='Type',
    columns='Campus',
    values='Height',
    aggfunc='max'
)

# Display the pivot table
display(pivot_table)

landmarks = df.copy()
landmarks

# Reset the index of event_data without keeping the old index as a column
event_data.reset_index(inplace=True)
event_data

# Perform an inner join using the join method
result_join_inner = landmarks.join(event_data.set_index('Year'), on='Year Built', how='inner')

# Display the result
result_join_inner

# Perform an inner join using the merge function
result_merge_inner = landmarks.merge(event_data, how='inner', left_on='Year Built', right_on='Year')

# Display the result
display(result_merge_inner)

# Perform an outer join
result_join_outer = landmarks.join(event_data.set_index('Year'), on='Year Built', how='outer')

# Display the result
display(result_join_outer)

# Perform an outer join using the merge function
result_merge_outer = landmarks.merge(event_data, left_on='Year Built', right_on='Year', how='outer')

# Display the result
display(result_merge_outer)

mpg = pd.read_csv("hf://datasets/scikit-learn/auto-mpg/auto-mpg.csv")
mpg['origin'] = mpg['origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
mpg

C:\Users\narge\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

import matplotlib.pyplot as plt 
import seaborn as sns

Matplotlib is building the font cache; this may take a moment.

# Line Plot
sns.lineplot(data=mpg, x='model year', y='mpg', hue='origin', marker='o')
plt.title('Average MPG by Model Year and Origin')
plt.xlabel('Model Year')
plt.ylabel('Miles Per Gallon (MPG)')
plt.legend(title='Origin')
plt.show()

# Bar Chart
mpg.groupby('origin')['mpg'].mean().plot(kind='bar', color=['blue', 'orange', 'green'])
plt.title('Average MPG by Origin')
plt.ylabel('Average MPG')
plt.xlabel('Origin')
plt.show()

# Histogram
sns.histplot(data=mpg, x='mpg', hue='origin', element='step', stat='count', common_norm=False)
plt.title('MPG Distribution by Origin')
plt.xlabel('Miles Per Gallon (MPG)')
plt.ylabel('Count')
plt.show()

# Box Plot
sns.boxplot(data=mpg, x='origin', y='mpg', hue='origin', palette='Set2')
plt.title('MPG Distribution by Origin')
plt.xlabel('Origin')
plt.ylabel('Miles Per Gallon (MPG)')
plt.show()

# Heatmap
corr = mpg[['mpg', 'cylinders', 'displacement', 'weight', 'acceleration']].corr()
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap')
plt.show()

# Scatter Plot
sns.scatterplot(data=mpg, 
                x='weight', y='mpg', 
                hue='origin', 
                size='cylinders')
plt.title('MPG vs. Weight by Origin')
plt.xlabel('Weight (lbs)')
plt.ylabel('Miles Per Gallon (MPG)')
plt.legend(title='Origin')
plt.show()

pd.set_option('plotting.backend', 'plotly')

mpg.plot(
    kind='scatter',
    x='weight', y='mpg', 
    color='origin', 
    size='cylinders',
    title='MPG vs. Weight by Origin',
    width=800, height=600)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\IPython\core\formatters.py:984, in IPythonDisplayFormatter.__call__(self, obj)
    982 method = get_real_method(obj, self.print_method)
    983 if method is not None:
--> 984     method()
    985     return True

File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\plotly\basedatatypes.py:850, in BaseFigure._ipython_display_(self)
    847 import plotly.io as pio
    849 if pio.renderers.render_on_display and pio.renderers.default:
--> 850     pio.show(self)
    851 else:
    852     print(repr(self))

File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\plotly\io\_renderers.py:415, in show(fig, renderer, validate, **kwargs)
    410     raise ValueError(
    411         "Mime type rendering requires ipython but it is not installed"
    412     )
    414 if not nbformat or Version(nbformat.__version__) < Version("4.2.0"):
--> 415     raise ValueError(
    416         "Mime type rendering requires nbformat>=4.2.0 but it is not installed"
    417     )
    419 display_jupyter_version_warnings()
    421 ipython_display.display(bundle, raw=True)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

import plotly.express as px

px.scatter(mpg, x='weight', y='mpg', color='origin', 
           size='cylinders', size_max=12,
           hover_data=mpg.columns,
           title='MPG vs. Weight by Origin',
           labels={'weight': 'Weight (lbs)', 'mpg': 'Miles Per Gallon (MPG)'},
           width=800, height=600)

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\IPython\core\formatters.py:984, in IPythonDisplayFormatter.__call__(self, obj)
    982 method = get_real_method(obj, self.print_method)
    983 if method is not None:
--> 984     method()
    985     return True

File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\plotly\basedatatypes.py:850, in BaseFigure._ipython_display_(self)
    847 import plotly.io as pio
    849 if pio.renderers.render_on_display and pio.renderers.default:
--> 850     pio.show(self)
    851 else:
    852     print(repr(self))

File ~\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\plotly\io\_renderers.py:415, in show(fig, renderer, validate, **kwargs)
    410     raise ValueError(
    411         "Mime type rendering requires ipython but it is not installed"
    412     )
    414 if not nbformat or Version(nbformat.__version__) < Version("4.2.0"):
--> 415     raise ValueError(
    416         "Mime type rendering requires nbformat>=4.2.0 but it is not installed"
    417     )
    419 display_jupyter_version_warnings()
    421 ipython_display.display(bundle, raw=True)

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed

fig = px.scatter(mpg, x='weight', y='mpg', color='origin',
                 hover_data=mpg.columns,
                 animation_frame='model year', 
                 title='MPG vs. Weight by Origin',
                 labels={'weight': 'Weight (lbs)', 'mpg': 'Miles Per Gallon (MPG)'},
                 width=800, height=600)
fig.update_layout(
    xaxis_title='Weight (lbs)',
    yaxis_title='Miles Per Gallon (MPG)',
    xaxis_range=[1500, 5000],
    yaxis_range=[10, 50],
    legend_title_text='Origin',
)
fig.show()

fig = mpg.plot(
    kind='scatter',
    x='weight', y='mpg', color='origin', size='cylinders',
    title='MPG vs. Weight by Origin',
    width=800, height=600)

# change to the style
fig.update_layout(template='plotly_dark')
# fig.update_layout(template='plotly_white')
# fig.update_layout(template='ggplot2')
# fig.update_layout(template='seaborn')
fig.update_layout(xaxis_title='Weight (lbs)',
                  yaxis_title='Miles per Gallon (MPG)',
                  legend_title='Origin')
fig.show()

fig.write_html('mpg_scatter.html', include_plotlyjs='cdn')
fig.write_image('mpg_scatter.png', scale=2, width=800, height=600)
fig.write_image('mpg_scatter.pdf', scale=2, width=800, height=600)

display(fig.data)
display(fig.layout)

fig = px.scatter(mpg, x='weight', y='mpg', color='origin', size='cylinders',
                 title='MPG vs. Weight by Origin',
                 width=800, height=600, 
                 template='plotly_dark')
# change the marker symbol for the USA trace
fig.update_traces(marker=dict(symbol="square"), selector=dict(name="USA")) 
# you can also just modify the data dictionary directly
#fig.data[0]['marker']['symbol'] = "square"

# change formatting (layout) of the figure
fig.update_layout(font=dict(family="Courier New, monospace", size=16))
# You can also refer to the font family and size directly
fig.update_layout(font_family="Courier New, monospace", font_size=16)
fig

from plotly import graph_objects as go

fig = go.Figure()
max_size = 20

# Iterate over unique origins and create a scatter trace for each
for i, origin in enumerate(mpg['origin'].unique()):
    # Filter the DataFrame for the current origin
    subset = mpg[mpg['origin'] == origin]
    marker_sizes = max_size*subset['cylinders']/subset['cylinders'].max()
    # Create a hover text for each point
    hover_text = (
            subset['origin'] + "<br>"
                  "Weight: " + subset['weight'].astype(str) + "<br>"
                  "MPG: " + subset['mpg'].astype(str) + "<br>"
                  "Cylinders: " + subset['cylinders'].astype(str))
    # add a trace to the figure
    fig.add_trace(
        go.Scatter(
            x=subset['weight'], y=subset['mpg'],
            mode='markers',
            name=origin,
            marker=dict(size=marker_sizes, color=i),
            text=hover_text,
        )
    )
fig.add_annotation(
    text="Data source: Auto MPG dataset",
    xref="paper", yref="paper",
    x=0, y=-0.1,
    showarrow=False,
    font=dict(size=12, color="gray")
)
fig.update_layout(
    title='MPG vs. Weight by Origin',
    xaxis_title='Weight (lbs)',
    yaxis_title='Miles per Gallon (MPG)',
    width=800, height=600,
    template='plotly_white',
    font_family="Times", font_size=16,
)
fig.show()

px.histogram(mpg, x='mpg', facet_row='origin')

mpg.hist(x='mpg', color='origin', bins=10, barmode='overlay')

fig = mpg.hist(x='mpg', color='origin', bins=10, facet_row='origin',
         title='MPG Distribution by Origin',
         width=800, height=600)
fig

mpg['make'] =mpg['car name'].str.split(' ').str[0]
mpg.plot(kind='bar',
         x='make', color='origin', 
         hover_data=['mpg', 'cylinders', 'car name'],
         title='Average MPG by Make and Origin',
         width=800, height=600)

yearly_mpg = (
    mpg
    .groupby(['origin', 'model year'])
    [['mpg', 'displacement', 'weight']]
    .mean().reset_index()
)
yearly_mpg.head()

px.scatter(yearly_mpg, x='model year', y='mpg', color='origin',
        title='Average MPG by Model Year and Origin',
        width=800, height=600)

px.line(yearly_mpg, x='model year', y='mpg', color='origin',
        markers=True,
        title='Average MPG by Model Year and Origin',
        width=800, height=600)

	Event	Location
Year
1868	Founding of UC Berkeley	Berkeley, CA
1914	Completion of Campanile	Berkeley, CA
1923	Opening of Memorial Stadium	Berkeley, CA
1964	Free Speech Movement	Berkeley, CA
2000	Opening of Hearst Memorial Mining Building	Berkeley, CA

	Height	Year Built
count	5.000000	4.000000
mean	83.400000	1924.250000
std	129.200619	25.223997
min	0.000000	1910.000000
25%	0.000000	1910.750000
50%	30.000000	1912.500000
75%	80.000000	1926.000000
max	307.000000	1962.000000

	Height	Year Built
Type
Gate	22.500000	1912.000000
Library	35.000000	1929.000000
Open Space	0.000000	1905.500000
Plaza	0.000000	1986.250000
Tower	295.666667	1939.666667

	Year	Event	Location
0	1868	Founding of UC Berkeley	Berkeley, CA
1	1914	Completion of Campanile	Berkeley, CA
2	1923	Opening of Memorial Stadium	Berkeley, CA
3	1964	Free Speech Movement	Berkeley, CA
4	2000	Opening of Hearst Memorial Mining Building	Berkeley, CA

	mpg	cylinders	displacement	horsepower	weight	acceleration	model year	origin	car name
0	18.0	8	307.0	130	3504	12.0	70	USA	chevrolet chevelle malibu
1	15.0	8	350.0	165	3693	11.5	70	USA	buick skylark 320
2	18.0	8	318.0	150	3436	11.0	70	USA	plymouth satellite
3	16.0	8	304.0	150	3433	12.0	70	USA	amc rebel sst
4	17.0	8	302.0	140	3449	10.5	70	USA	ford torino
...	...	...	...	...	...	...	...	...	...
393	27.0	4	140.0	86	2790	15.6	82	USA	ford mustang gl
394	44.0	4	97.0	52	2130	24.6	82	Europe	vw pickup
395	32.0	4	135.0	84	2295	11.6	82	USA	dodge rampage
396	28.0	4	120.0	79	2625	18.6	82	USA	ford ranger
397	31.0	4	119.0	82	2720	19.4	82	USA	chevy s-10

	Landmark	Type	Height	Year Built
0	Sather Gate	Gate	30	1910.0
1	Campanile	Tower	307	1914.0
2	Doe Library	Library	80	1911.0
3	Memorial Glade	Open Space	0	NaN
4	Sproul Plaza	Plaza	0	1962.0

	Landmark	Type	Height	Year Built
0	Sather Gate	Gate	30.0	NaN
1	Campanile	Tower	307.0	1914.0
2	Doe Library	Library	NaN	1911.0
3	Memorial Glade	Open Space	0.0	NaN
4	Sproul Plaza	None	0.0	1962.0

	Landmark	Type	Height	Year Built
0	False	False	False	True
1	False	False	False	False
2	False	False	True	False
3	False	False	False	True
4	False	True	False	False

	Landmark	Type	Year Built	Campus
3	Memorial Glade	Open Space	NaN	UC Berkeley
7	Faculty Glade	Open Space	NaN	UC Berkeley
12	Killian Court	Open Space	1920.0	MIT
17	Main Quad	Open Space	1891.0	Stanford

		Height
Type	Campus
Gate	MIT	15
	Stanford	20
	UC Berkeley	30
Library	MIT	0
	Stanford	0
	UC Berkeley	80
Open Space	MIT	0
	Stanford	0
	UC Berkeley	0
Plaza	MIT	0
	Stanford	0
	UC Berkeley	0
Tower	MIT	295
	Stanford	285
	UC Berkeley	307

	Landmark	Type	Height	Year Built	Event	Location
NaN	NaN	NaN	NaN	1868.0	Founding of UC Berkeley	Berkeley, CA
0.0	Sather Gate	Gate	30.0	1910.0	NaN	NaN
2.0	Doe Library	Library	80.0	1911.0	NaN	NaN
1.0	Campanile	Tower	307.0	1914.0	Completion of Campanile	Berkeley, CA
NaN	NaN	NaN	NaN	1923.0	Opening of Memorial Stadium	Berkeley, CA
4.0	Sproul Plaza	Plaza	0.0	1962.0	NaN	NaN
NaN	NaN	NaN	NaN	1964.0	Free Speech Movement	Berkeley, CA
NaN	NaN	NaN	NaN	2000.0	Opening of Hearst Memorial Mining Building	Berkeley, CA
3.0	Memorial Glade	Open Space	0.0	NaN	NaN	NaN

Lecture 02 – CS 189, Fall 2025

pandas¶

Pandas Data Structures¶

DataFrame¶

1. From a CSV File¶

2. From Scratch¶

Series¶

Exploring DataFrame¶

Selecting and Retrieving Data from a DataFrame¶

1. iloc[] - Integer-Location Based Indexing¶

2. loc[] - Label-Based Indexing¶

3. Context-Dependent Selection¶

Filtering Data in a DataFrame¶

DataFrame Modification¶

Sorting Your DataFrame¶

Handling Missing Values in a DataFrame¶

Aggregation in DataFrame¶

Aggregation Functions¶

Basic Aggregations¶

Statistical Aggregations¶

Logical and Index-based Aggregations¶

Groupby()¶

Grouping by One Column¶

Grouping by Multiple Columns¶

Pivot Tables in pandas¶

Joining DataFrames in pandas¶

Types of Joins:¶

Inner Join¶

Outer Join¶

Visualization¶

Toy Data¶

Matplotlib and Seaborn¶

Matplotlib¶

Types of Plots:¶

Seaborn¶

Types of Plots:¶

Value of Interactive Visualizations¶

Three Modes for Plotly¶

Using pandas Plotting¶

Creating an Interactive Scatter Plot with Plotly Express¶

Using Plotly Graphics Objects¶

Visualizing Different Kinds of Data¶

Histograms¶

Scatter and Line Plots¶

`pandas`¶

`DataFrame`¶

`Series`¶

Exploring `DataFrame`¶

Selecting and Retrieving Data from a `DataFrame`¶

1. `iloc[]` - Integer-Location Based Indexing¶

2. `loc[]` - Label-Based Indexing¶

Filtering Data in a `DataFrame`¶

`DataFrame` Modification¶

Aggregation in `DataFrame`¶

`Groupby()`¶

Pivot Tables in `pandas`¶

Joining `DataFrames` in `pandas`¶

Using `pandas` Plotting¶