Python Quick Reference

This notebook provides a curated, compact reference based on a full length beginner curriculum. By design we skip the introductory logic and focus on providing a reference notebook to refresh the key knowledge on foundational Python operations and concepts.

Sections:

Quick Tips
Variables and Types
Imports
Expressions
Lists
Dictionaries
Tuples and Sets
Functions and Lambda
NumPy Arrays
Pandas Series and DataFrame

Quick Tips

Trap	Correct understanding
`{}`	Creates a dict, not a set. Use `set()` for an empty set
`a = b = [1,2,3]`	Both names point to the same list, mutating one mutates both
`sorted(d)` on a dict	Returns sorted keys, not values
`.loc[0:4]`	Returns 5 rows (0,1,2,3,4), stop is inclusive
`.iloc[0:4]`	Returns 4 rows (0,1,2,3), stop is exclusive
`reshape()`	Returns a view, changes in original propagate
`from numpy import *` then `sum([1,2])`	Not a good practice, shadows Python built-in `sum`
`Series + Series` with different index	Unmatched labels become NaN
`fillna(method=)`	`ffill`/`pad` = forward; `bfill`/`backfill` = backward
`np.linspace(0,10,5)`	Produces 5 points including both endpoints by default

References & Resources

Python built-in functions

Variables and Types

In [ ]:

x = 'seven'    
print(type(x))

<class 'str'>

In [ ]:

# Same value -> same id (CPython caches small ints and interned strings)
a = 42
b = a
print(id(a) == id(b))   # True: both names point to the same object

b = 43
print(id(a) == id(b))   # False: b now points to a different object

In [ ]:

# Line continuation with \
total = 10 + \
        20 + \
        30

2. Imports

In [11]:

# pip install numpy
# pip install numpy==2.4.4       # Pin a specific version

import numpy
print(numpy.__version__)              # Check installed version

2.4.4

In [12]:

import math                        # 1. Full module: use as math.pi, math.cos()
import numpy as np                 # 2. Alias:       use as np.array(), np.mean()
from numpy import *                # 4. Wildcard:    pollutes namespace, avoid in production

import warnings
warnings.filterwarnings('ignore')  # Suppress deprecation/runtime warnings

print(dir(math)[-5:])   # List module names, handy for exploration

['tan', 'tanh', 'tau', 'trunc', 'ulp']

Expressions

In [ ]:

# arithmetic operators
a, b = 9, 4
a / b    # 2.25  - always returns float
a // b   # 2     - floor division
a % b    # 1     - modulo (remainder)
a ** b   # 6461  - exponentiation

4. Lists

In [15]:

lst = [2, 4, 6, 8]

lst.append(10)          # [2,4,6,8,10]        - adds ONE item to end
lst.extend([12, 14])    # [2,4,6,8,10,12,14]  - merges another iterable
lst.insert(0, 0)        # [0,2,4,6,...]       - insert BEFORE index 0
lst.remove(0)           # removes FIRST occurrence of value 0
lst.pop(2)              # removes and returns item at index 2
lst.pop()               # removes and returns LAST item
lst.index(8)            # returns index of first occurrence, raises ValueError if absent
lst.count(4)            # counts occurrences of 4
lst.reverse()           # in-place reversal
lst.sort()              # in-place sort, sort(reverse=True) for descending

In [ ]:

# Lists as STACK (LIFO): append() to push, pop() to pop from top
stack = [1, 2, 3]
stack.append(4)   # push
stack.pop()       # last in, first out

# Lists as QUEUE (FIFO): use collections.deque instead of plain list:
# list.insert(0, x) and list.pop() -> O(n)
# deque.appendleft/popleft         -> O(1)
from collections import deque
q = deque(['a', 'b', 'c'])
q.append('d')     # enqueue to right
q.popleft()       # 'a' pops as first in, first out

Dictionaries

In [ ]:

d = {'alpha': 1, 'beta': 2, 'gamma': 3}

d['alpha']           # returns 1, accessed by key
len(d)               # 3
d.keys()             # dict_keys(['alpha', 'beta', 'gamma'])
d.values()           # dict_values([1, 2, 3])
del d['gamma']       # remove key-value pair
d.pop('beta')        # returns 2 and removes it
sorted(d)            # returns sorted list of KEYS (not values)
d.clear()            # empties the dict, d is now {}

Tuples and Sets

In [ ]:

# TUPLES are immutable, does not have append/insert/remove
t = (10, 20, 30, 40)
t[0]                # reading is fine
# t[0] = 99         # TypeError: 'tuple' object does not support item assignment
# t.append(50)      # AttributeError: no append method
len(t)              # 4

In [ ]:

# SETS are unordered, have unique elements and mutable
s = {3, 1, 4, 1, 5, 9, 2, 6, 5}   # removes duplicates automatically
print(s)    # same initial order is not guaranteed

# Empty set: use set()
# = {} creates an empty DICT
empty = set()
print(type(empty))   # <class 'set'>

x = set('ABCDE')
y = set('CDEFG')

x | y               # x.union(y)
x & y               # x.intersection(y))
x - y               # x.difference(y)) - meaning in x but not y
x.isdisjoint(y)     # True if no common elements
y < x               # also y.issubset(x)
x > y               # x.issuperset(y) 

x.add('Z')          # add single element
x.discard('Z')      # remove if present - no error if absent (unlike remove())
x.pop()             # removes and returns an ARBITRARY element

Functions and Lambda

In [18]:

# Standard function definition with a default arguments

def get_confidence_band(ma, sd, level=2):
    upper = ma + level * sd
    lower = ma - level * sd
    return (upper, lower)

In [ ]:

# Lambda: anonymous, single-expression functions
# syntax: lambda arg1, arg2 : expression

sq = lambda x: x ** 2
sq(9)    # 81

# map() - apply function element-wise across one or more iterables
list(map(lambda x, y: x + y, [1, 2, 3], [10, 20, 30]))   # [11, 22, 33]

# filter() - keep elements where function returns True
nums = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
list(filter(lambda x: x > 5, nums))     # [8, 13, 21, 34]

tags = ['buy', 'sell', 'sell', 'buy', 'buy']
list(filter(lambda x: x == 'buy', tags))   # ['buy', 'buy', 'buy']

NumPy Arrays

In [ ]:

import numpy as np

# Create from list or tuple
a = np.array([1, 2, 3, 4, 5])        # 1D - shape (5,)
b = np.array([[1,2,3],[4,5,6]])      # 2D - shape (2,3)

# arange vs linspace
np.arange(0, 10, 2)                      # [0,2,4,6,8]          - step-based, EXCLUDES stop
np.linspace(0, 10, 5)                    # [0, 2.5, 5, 7.5, 10] - count-based, INCLUDES stop
np.linspace(0, 10, 5, endpoint=False)    # excludes stop
np.linspace(0, 10, 5, retstep=True)      # returns (array, step_size)

In [ ]:

# Shape, ndim, dtype
a = np.array([[1,2,3],[4,5,6]])
a.shape    # (2, 3)
a.ndim     # 2
a.dtype    # dtype('int64')

# Reshape: total elements must stay the same
np.arange(12).reshape(3, 4)   # 3x4 matrix

# reshape creates a VIEW not a copy, thus mutations propagate
A = np.arange(6)
B = A.reshape(2, 3)
A[0] = 99
print(B[0, 0])   # returns 99, so B reflects the change in A

In [ ]:

# Special arrays
np.zeros((3, 3))              # all 0.0 floats
np.ones((4, 4), dtype=int)    # all 1 integers
np.identity(4)                # 4x4 identity matrix (diagonal = 1)

In [ ]:

# Indexing and slicing
A = np.arange(50).reshape(5, 10)

A[1, 3]        # row 1, col 3
A[-1, -1]      # last row, last col
A[1, :]        # entire row 1
A[:, 3]        # entire col 3
A[:3, 2:]      # first 3 rows, cols from 2 onward
A[::2, ::3]    # every 2nd row, every 3rd col
A[:, :-2]      # all rows, drop last 2 cols

# stop is always EXCLUSIVE

In [ ]:

# Vectorization: operations apply element-wise without explicit loops
V = np.array([1, 2, 3, 4, 5])
V + 10        # [11, 12, 13, 14, 15]
V * 3         # [ 3,  6,  9, 12, 15]
V ** 2        # [ 1,  4,  9, 16, 25]

# Two arrays: must have same shape OR be broadcastable
A = np.array([[1,2,3],[4,5,6],[7,8,9]])
B = np.array([1,2,3])   # shape (3,) broadcasts across rows of A
A * B   # each row multiplied element-wise by [1,2,3]

# Mismatched non-broadcastable shapes -> ValueError
# np.ones((3,3)) + np.ones((4,4))  -- raises ValueError

In [ ]:

# Column vector trick: changing broadcast direction
B = np.array([1, 2, 3])
B[:, np.newaxis]   # shape changes from (3,) to (3,1) and now broadcasts across columns

# Comparison and logical operations
A = np.array([[11,12],[21,22]])
B = np.array([[11,99],[21,22]])
A == B                          # element-wise boolean array
np.array_equal(A, B)            # True only if ALL elements match
np.logical_and(A > 10, A < 20)  # element-wise AND

Pandas Series and DataFrame

In [ ]:

import pandas as pd
import numpy as np

# Series: 1D labelled array
s = pd.Series([150.0, 2800.0, 720.0], index=['AAPL', 'GOOG', 'TSLA'])

# Adding Series with DIFFERENT indexes creates NaN for unmatched labels
s1 = pd.Series([1, 2], index=['AAPL', 'MSFT'])
s2 = pd.Series([10, 20], index=['MSFT', 'GOOG'])
s1 + s2   # AAPL->NaN, MSFT->12, GOOG->NaN

s.isnull()          # boolean mask of NaN positions
s.dropna()          # drop NaN entries
s.fillna(0)         # replace NaN with scalar
s.apply(np.sqrt)    # apply any function element-wise

In [ ]:

# DataFrame creation
df = pd.DataFrame({
    'ticker': ['AAPL', 'GOOG', 'TSLA'],
    'qty':    [100,    50,     200],
    'price':  [150.0, 2800.0, 720.0]
})

# Specify column order and custom index at creation time
df2 = pd.DataFrame(df, columns=['price', 'qty'], index=['r1','r2','r3'])

# Use an existing column as the index
df.set_index('ticker', inplace=True)

In [ ]:

# CSV I/O
df = pd.read_csv('prices.csv')
df.head()    # first 5 rows
df.tail()    # last 5 rows
df.shape     # (rows, cols)

# Drop columns and rows
df.drop(['Volume', 'Adj Close'], axis=1, inplace=True)    # axis=1 targets columns
df.drop(df.index[[2, 4]])                                 # drop rows by position

# Rename columns
df.rename(columns={'Close Price': 'Close', 'Open Price': 'Open'}, inplace=True)

# Sort
df.sort_values(by='Close', ascending=False)

In [ ]:

# .loc vs .iloc 

# .loc is label-based, stop value is included
df.loc[0:4, 'Close']            # rows 0,1,2,3,4 (5 rows) for column 'Close'
df.loc[:, ['Open', 'Close']]    # all rows, two columns by name

# .iloc is position-based (like numpy), stop value is not included
df.iloc[:4]                     # rows 0,1,2,3 (4 rows)
df.iloc[1:3, 2:5]               # rows 1-2, cols 2-4
df.iloc[[1,3,5], [1,3,5]]       # exact row and col positions

In [ ]:

# Missing values
df.isnull().sum()                       # count NaNs per column
df.fillna(0)                            # fill all NaN with 0
df['Close'].fillna(method='ffill')      # forward-fill: carry previous value forward
df['Close'].fillna(method='bfill')      # back-fill: use next valid value
df.dropna()                             # drop any row with at least one NaN
df.dropna(axis=1)                       # drop any column with at least one NaN
df.replace({-9999: np.nan, 0: np.nan})  # replace specific sentinel values

In [ ]:

# Descriptive stats: all return a scalar on a Series
col = df['Close']
col.count()          # non-null count
col.mean()           # arithmetic mean
col.median()
col.mode()           # may return multiple values (Series)
col.var()            # variance
col.std()            # standard deviation
col.skew()           # >0 right-skewed, <0 left-skewed
col.kurt()           # >0 leptokurtic (fat tails), <0 platykurtic
col.diff()           # first difference: t minus (t-1)
col.pct_change()     # (t - (t-1)) / (t-1)
col.cov(df['Open'])  # covariance with another Series
col.corr(df['Open']) # correlation coefficient with another Series

In [ ]:

# Rolling and expanding windows
# expanding mean converges to the overall mean
# rolling mean reacts faster to recent data
col.rolling(window=20).mean()          # moving average (NaN for first 19 rows)
col.rolling(window=20).std()           # rolling std dev
col.expanding(min_periods=20).mean()   # uses all data up to each point in time

In [ ]:

# GroupBy -> aggregate -> filter -> display
grouped = df.groupby('Sector')

grouped.groups                           # dict of {group_key: [indices]}
grouped['Amount'].agg('mean')            # mean per group
grouped['Amount'].agg(['sum', 'mean'])   # multiple aggregations at once
grouped.filter(lambda x: len(x) >= 3)    # keep only groups with 3 or more rows

for name, group_df in grouped:           # iterate groups
    print(name, group_df.shape)

In [ ]:

# Merge and Concat
left = pd.DataFrame({'id': [1, 2], 'Sector': ['Tech', 'Finance'], 'Value_left': [100, 200]})
right = pd.DataFrame({'id': [2, 3], 'Sector': ['Finance', 'Health'], 'Value_right': [20, 30]})

# merge: SQL-style joins on a key column
pd.merge(left, right, on='id')                      # inner join (default)
pd.merge(left, right, on='Sector', how='left')      # left join
pd.merge(left, right, on='Sector', how='outer')     # outer join -> NaN for missing

df1 = pd.DataFrame({'Sector': ['Tech', 'Finance'], 'Value': [10, 20]})
df2 = pd.DataFrame({'Sector': ['Health', 'Energy'], 'Value': [30, 40]})

# concat: stack DataFrames vertically or horizontally
pd.concat([df1, df2])                               # vertical (axis=0)
pd.concat([df1, df2], axis=1)                       # horizontal (axis=1)
pd.concat([df1, df2], ignore_index=True)            # reset index after stacking
pd.concat([df1, df2], keys=['x', 'y'])              # adds hierarchical index

Python Quick Reference

Sections:

Quick Tips
Variables and Types
Imports
Expressions
Lists
Dictionaries
Tuples and Sets
Functions and Lambda
NumPy Arrays
Pandas Series and DataFrame

Quick Tips

Trap	Correct understanding
`{}`	Creates a dict, not a set. Use `set()` for an empty set
`a = b = [1,2,3]`	Both names point to the same list, mutating one mutates both
`sorted(d)` on a dict	Returns sorted keys, not values
`.loc[0:4]`	Returns 5 rows (0,1,2,3,4), stop is inclusive
`.iloc[0:4]`	Returns 4 rows (0,1,2,3), stop is exclusive
`reshape()`	Returns a view, changes in original propagate
`from numpy import *` then `sum([1,2])`	Not a good practice, shadows Python built-in `sum`
`Series + Series` with different index	Unmatched labels become NaN
`fillna(method=)`	`ffill`/`pad` = forward; `bfill`/`backfill` = backward
`np.linspace(0,10,5)`	Produces 5 points including both endpoints by default

References & Resources

Python built-in functions

Variables and Types

In [ ]:

x = 'seven'    
print(type(x))

<class 'str'>

In [ ]:

# Same value -> same id (CPython caches small ints and interned strings)
a = 42
b = a
print(id(a) == id(b))   # True: both names point to the same object

b = 43
print(id(a) == id(b))   # False: b now points to a different object

In [ ]:

# Line continuation with \
total = 10 + \
        20 + \
        30

2. Imports

In [11]:

# pip install numpy
# pip install numpy==2.4.4       # Pin a specific version

import numpy
print(numpy.__version__)              # Check installed version

2.4.4

In [12]:

import math                        # 1. Full module: use as math.pi, math.cos()
import numpy as np                 # 2. Alias:       use as np.array(), np.mean()
from numpy import *                # 4. Wildcard:    pollutes namespace, avoid in production

import warnings
warnings.filterwarnings('ignore')  # Suppress deprecation/runtime warnings

print(dir(math)[-5:])   # List module names, handy for exploration

['tan', 'tanh', 'tau', 'trunc', 'ulp']

Expressions

In [ ]:

# arithmetic operators
a, b = 9, 4
a / b    # 2.25  - always returns float
a // b   # 2     - floor division
a % b    # 1     - modulo (remainder)
a ** b   # 6461  - exponentiation

4. Lists

In [15]:

lst = [2, 4, 6, 8]

lst.append(10)          # [2,4,6,8,10]        - adds ONE item to end
lst.extend([12, 14])    # [2,4,6,8,10,12,14]  - merges another iterable
lst.insert(0, 0)        # [0,2,4,6,...]       - insert BEFORE index 0
lst.remove(0)           # removes FIRST occurrence of value 0
lst.pop(2)              # removes and returns item at index 2
lst.pop()               # removes and returns LAST item
lst.index(8)            # returns index of first occurrence, raises ValueError if absent
lst.count(4)            # counts occurrences of 4
lst.reverse()           # in-place reversal
lst.sort()              # in-place sort, sort(reverse=True) for descending

In [ ]:

# Lists as STACK (LIFO): append() to push, pop() to pop from top
stack = [1, 2, 3]
stack.append(4)   # push
stack.pop()       # last in, first out

# Lists as QUEUE (FIFO): use collections.deque instead of plain list:
# list.insert(0, x) and list.pop() -> O(n)
# deque.appendleft/popleft         -> O(1)
from collections import deque
q = deque(['a', 'b', 'c'])
q.append('d')     # enqueue to right
q.popleft()       # 'a' pops as first in, first out

Dictionaries

In [ ]:

d = {'alpha': 1, 'beta': 2, 'gamma': 3}

d['alpha']           # returns 1, accessed by key
len(d)               # 3
d.keys()             # dict_keys(['alpha', 'beta', 'gamma'])
d.values()           # dict_values([1, 2, 3])
del d['gamma']       # remove key-value pair
d.pop('beta')        # returns 2 and removes it
sorted(d)            # returns sorted list of KEYS (not values)
d.clear()            # empties the dict, d is now {}

Tuples and Sets

In [ ]:

# TUPLES are immutable, does not have append/insert/remove
t = (10, 20, 30, 40)
t[0]                # reading is fine
# t[0] = 99         # TypeError: 'tuple' object does not support item assignment
# t.append(50)      # AttributeError: no append method
len(t)              # 4

In [ ]:

# SETS are unordered, have unique elements and mutable
s = {3, 1, 4, 1, 5, 9, 2, 6, 5}   # removes duplicates automatically
print(s)    # same initial order is not guaranteed

# Empty set: use set()
# = {} creates an empty DICT
empty = set()
print(type(empty))   # <class 'set'>

x = set('ABCDE')
y = set('CDEFG')

x | y               # x.union(y)
x & y               # x.intersection(y))
x - y               # x.difference(y)) - meaning in x but not y
x.isdisjoint(y)     # True if no common elements
y < x               # also y.issubset(x)
x > y               # x.issuperset(y) 

x.add('Z')          # add single element
x.discard('Z')      # remove if present - no error if absent (unlike remove())
x.pop()             # removes and returns an ARBITRARY element

Functions and Lambda

In [18]:

# Standard function definition with a default arguments

def get_confidence_band(ma, sd, level=2):
    upper = ma + level * sd
    lower = ma - level * sd
    return (upper, lower)

In [ ]:

# Lambda: anonymous, single-expression functions
# syntax: lambda arg1, arg2 : expression

sq = lambda x: x ** 2
sq(9)    # 81

# map() - apply function element-wise across one or more iterables
list(map(lambda x, y: x + y, [1, 2, 3], [10, 20, 30]))   # [11, 22, 33]

# filter() - keep elements where function returns True
nums = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
list(filter(lambda x: x > 5, nums))     # [8, 13, 21, 34]

tags = ['buy', 'sell', 'sell', 'buy', 'buy']
list(filter(lambda x: x == 'buy', tags))   # ['buy', 'buy', 'buy']

NumPy Arrays

In [ ]:

import numpy as np

# Create from list or tuple
a = np.array([1, 2, 3, 4, 5])        # 1D - shape (5,)
b = np.array([[1,2,3],[4,5,6]])      # 2D - shape (2,3)

# arange vs linspace
np.arange(0, 10, 2)                      # [0,2,4,6,8]          - step-based, EXCLUDES stop
np.linspace(0, 10, 5)                    # [0, 2.5, 5, 7.5, 10] - count-based, INCLUDES stop
np.linspace(0, 10, 5, endpoint=False)    # excludes stop
np.linspace(0, 10, 5, retstep=True)      # returns (array, step_size)

In [ ]:

# Shape, ndim, dtype
a = np.array([[1,2,3],[4,5,6]])
a.shape    # (2, 3)
a.ndim     # 2
a.dtype    # dtype('int64')

# Reshape: total elements must stay the same
np.arange(12).reshape(3, 4)   # 3x4 matrix

# reshape creates a VIEW not a copy, thus mutations propagate
A = np.arange(6)
B = A.reshape(2, 3)
A[0] = 99
print(B[0, 0])   # returns 99, so B reflects the change in A

In [ ]:

# Special arrays
np.zeros((3, 3))              # all 0.0 floats
np.ones((4, 4), dtype=int)    # all 1 integers
np.identity(4)                # 4x4 identity matrix (diagonal = 1)

In [ ]:

# Indexing and slicing
A = np.arange(50).reshape(5, 10)

A[1, 3]        # row 1, col 3
A[-1, -1]      # last row, last col
A[1, :]        # entire row 1
A[:, 3]        # entire col 3
A[:3, 2:]      # first 3 rows, cols from 2 onward
A[::2, ::3]    # every 2nd row, every 3rd col
A[:, :-2]      # all rows, drop last 2 cols

# stop is always EXCLUSIVE

In [ ]:

# Vectorization: operations apply element-wise without explicit loops
V = np.array([1, 2, 3, 4, 5])
V + 10        # [11, 12, 13, 14, 15]
V * 3         # [ 3,  6,  9, 12, 15]
V ** 2        # [ 1,  4,  9, 16, 25]

# Two arrays: must have same shape OR be broadcastable
A = np.array([[1,2,3],[4,5,6],[7,8,9]])
B = np.array([1,2,3])   # shape (3,) broadcasts across rows of A
A * B   # each row multiplied element-wise by [1,2,3]

# Mismatched non-broadcastable shapes -> ValueError
# np.ones((3,3)) + np.ones((4,4))  -- raises ValueError

In [ ]:

# Column vector trick: changing broadcast direction
B = np.array([1, 2, 3])
B[:, np.newaxis]   # shape changes from (3,) to (3,1) and now broadcasts across columns

# Comparison and logical operations
A = np.array([[11,12],[21,22]])
B = np.array([[11,99],[21,22]])
A == B                          # element-wise boolean array
np.array_equal(A, B)            # True only if ALL elements match
np.logical_and(A > 10, A < 20)  # element-wise AND

Pandas Series and DataFrame

In [ ]:

import pandas as pd
import numpy as np

# Series: 1D labelled array
s = pd.Series([150.0, 2800.0, 720.0], index=['AAPL', 'GOOG', 'TSLA'])

# Adding Series with DIFFERENT indexes creates NaN for unmatched labels
s1 = pd.Series([1, 2], index=['AAPL', 'MSFT'])
s2 = pd.Series([10, 20], index=['MSFT', 'GOOG'])
s1 + s2   # AAPL->NaN, MSFT->12, GOOG->NaN

s.isnull()          # boolean mask of NaN positions
s.dropna()          # drop NaN entries
s.fillna(0)         # replace NaN with scalar
s.apply(np.sqrt)    # apply any function element-wise

In [ ]:

# DataFrame creation
df = pd.DataFrame({
    'ticker': ['AAPL', 'GOOG', 'TSLA'],
    'qty':    [100,    50,     200],
    'price':  [150.0, 2800.0, 720.0]
})

# Specify column order and custom index at creation time
df2 = pd.DataFrame(df, columns=['price', 'qty'], index=['r1','r2','r3'])

# Use an existing column as the index
df.set_index('ticker', inplace=True)

In [ ]:

# CSV I/O
df = pd.read_csv('prices.csv')
df.head()    # first 5 rows
df.tail()    # last 5 rows
df.shape     # (rows, cols)

# Drop columns and rows
df.drop(['Volume', 'Adj Close'], axis=1, inplace=True)    # axis=1 targets columns
df.drop(df.index[[2, 4]])                                 # drop rows by position

# Rename columns
df.rename(columns={'Close Price': 'Close', 'Open Price': 'Open'}, inplace=True)

# Sort
df.sort_values(by='Close', ascending=False)

In [ ]:

# .loc vs .iloc 

# .loc is label-based, stop value is included
df.loc[0:4, 'Close']            # rows 0,1,2,3,4 (5 rows) for column 'Close'
df.loc[:, ['Open', 'Close']]    # all rows, two columns by name

# .iloc is position-based (like numpy), stop value is not included
df.iloc[:4]                     # rows 0,1,2,3 (4 rows)
df.iloc[1:3, 2:5]               # rows 1-2, cols 2-4
df.iloc[[1,3,5], [1,3,5]]       # exact row and col positions

In [ ]:

# Missing values
df.isnull().sum()                       # count NaNs per column
df.fillna(0)                            # fill all NaN with 0
df['Close'].fillna(method='ffill')      # forward-fill: carry previous value forward
df['Close'].fillna(method='bfill')      # back-fill: use next valid value
df.dropna()                             # drop any row with at least one NaN
df.dropna(axis=1)                       # drop any column with at least one NaN
df.replace({-9999: np.nan, 0: np.nan})  # replace specific sentinel values

In [ ]:

# Descriptive stats: all return a scalar on a Series
col = df['Close']
col.count()          # non-null count
col.mean()           # arithmetic mean
col.median()
col.mode()           # may return multiple values (Series)
col.var()            # variance
col.std()            # standard deviation
col.skew()           # >0 right-skewed, <0 left-skewed
col.kurt()           # >0 leptokurtic (fat tails), <0 platykurtic
col.diff()           # first difference: t minus (t-1)
col.pct_change()     # (t - (t-1)) / (t-1)
col.cov(df['Open'])  # covariance with another Series
col.corr(df['Open']) # correlation coefficient with another Series

In [ ]:

# Rolling and expanding windows
# expanding mean converges to the overall mean
# rolling mean reacts faster to recent data
col.rolling(window=20).mean()          # moving average (NaN for first 19 rows)
col.rolling(window=20).std()           # rolling std dev
col.expanding(min_periods=20).mean()   # uses all data up to each point in time

In [ ]:

# GroupBy -> aggregate -> filter -> display
grouped = df.groupby('Sector')

grouped.groups                           # dict of {group_key: [indices]}
grouped['Amount'].agg('mean')            # mean per group
grouped['Amount'].agg(['sum', 'mean'])   # multiple aggregations at once
grouped.filter(lambda x: len(x) >= 3)    # keep only groups with 3 or more rows

for name, group_df in grouped:           # iterate groups
    print(name, group_df.shape)

In [ ]:

# Merge and Concat
left = pd.DataFrame({'id': [1, 2], 'Sector': ['Tech', 'Finance'], 'Value_left': [100, 200]})
right = pd.DataFrame({'id': [2, 3], 'Sector': ['Finance', 'Health'], 'Value_right': [20, 30]})

# merge: SQL-style joins on a key column
pd.merge(left, right, on='id')                      # inner join (default)
pd.merge(left, right, on='Sector', how='left')      # left join
pd.merge(left, right, on='Sector', how='outer')     # outer join -> NaN for missing

df1 = pd.DataFrame({'Sector': ['Tech', 'Finance'], 'Value': [10, 20]})
df2 = pd.DataFrame({'Sector': ['Health', 'Energy'], 'Value': [30, 40]})

# concat: stack DataFrames vertically or horizontally
pd.concat([df1, df2])                               # vertical (axis=0)
pd.concat([df1, df2], axis=1)                       # horizontal (axis=1)
pd.concat([df1, df2], ignore_index=True)            # reset index after stacking
pd.concat([df1, df2], keys=['x', 'y'])              # adds hierarchical index