Python Quick Reference
This notebook provides a curated, compact reference based on a full length beginner curriculum. By design we skip the introductory logic and focus on providing a reference notebook to refresh the key knowledge on foundational Python operations and concepts.
Sections:
Quick Tips
| Trap | Correct understanding |
|---|---|
{} |
Creates a dict, not a set. Use set() for an empty set |
a = b = [1,2,3] |
Both names point to the same list, mutating one mutates both |
sorted(d) on a dict |
Returns sorted keys, not values |
.loc[0:4] |
Returns 5 rows (0,1,2,3,4), stop is inclusive |
.iloc[0:4] |
Returns 4 rows (0,1,2,3), stop is exclusive |
reshape() |
Returns a view, changes in original propagate |
from numpy import * then sum([1,2]) |
Not a good practice, shadows Python built-in sum |
Series + Series with different index |
Unmatched labels become NaN |
fillna(method=) |
ffill/pad = forward; bfill/backfill = backward |
np.linspace(0,10,5) |
Produces 5 points including both endpoints by default |
References & Resources
Variables and Types
In [ ]:
x = 'seven'
print(type(x))
<class 'str'>
In [ ]:
# Same value -> same id (CPython caches small ints and interned strings)
a = 42
b = a
print(id(a) == id(b)) # True: both names point to the same object
b = 43
print(id(a) == id(b)) # False: b now points to a different object
In [ ]:
# Line continuation with \
total = 10 + \
20 + \
30
2. Imports
In [11]:
# pip install numpy
# pip install numpy==2.4.4 # Pin a specific version
import numpy
print(numpy.__version__) # Check installed version
2.4.4
In [12]:
import math # 1. Full module: use as math.pi, math.cos()
import numpy as np # 2. Alias: use as np.array(), np.mean()
from numpy import * # 4. Wildcard: pollutes namespace, avoid in production
import warnings
warnings.filterwarnings('ignore') # Suppress deprecation/runtime warnings
print(dir(math)[-5:]) # List module names, handy for exploration
['tan', 'tanh', 'tau', 'trunc', 'ulp']
Expressions
In [ ]:
# arithmetic operators
a, b = 9, 4
a / b # 2.25 - always returns float
a // b # 2 - floor division
a % b # 1 - modulo (remainder)
a ** b # 6461 - exponentiation
4. Lists
In [15]:
lst = [2, 4, 6, 8]
lst.append(10) # [2,4,6,8,10] - adds ONE item to end
lst.extend([12, 14]) # [2,4,6,8,10,12,14] - merges another iterable
lst.insert(0, 0) # [0,2,4,6,...] - insert BEFORE index 0
lst.remove(0) # removes FIRST occurrence of value 0
lst.pop(2) # removes and returns item at index 2
lst.pop() # removes and returns LAST item
lst.index(8) # returns index of first occurrence, raises ValueError if absent
lst.count(4) # counts occurrences of 4
lst.reverse() # in-place reversal
lst.sort() # in-place sort, sort(reverse=True) for descending
In [ ]:
# Lists as STACK (LIFO): append() to push, pop() to pop from top
stack = [1, 2, 3]
stack.append(4) # push
stack.pop() # last in, first out
# Lists as QUEUE (FIFO): use collections.deque instead of plain list:
# list.insert(0, x) and list.pop() -> O(n)
# deque.appendleft/popleft -> O(1)
from collections import deque
q = deque(['a', 'b', 'c'])
q.append('d') # enqueue to right
q.popleft() # 'a' pops as first in, first out
Dictionaries
In [ ]:
d = {'alpha': 1, 'beta': 2, 'gamma': 3}
d['alpha'] # returns 1, accessed by key
len(d) # 3
d.keys() # dict_keys(['alpha', 'beta', 'gamma'])
d.values() # dict_values([1, 2, 3])
del d['gamma'] # remove key-value pair
d.pop('beta') # returns 2 and removes it
sorted(d) # returns sorted list of KEYS (not values)
d.clear() # empties the dict, d is now {}
Tuples and Sets
In [ ]:
# TUPLES are immutable, does not have append/insert/remove
t = (10, 20, 30, 40)
t[0] # reading is fine
# t[0] = 99 # TypeError: 'tuple' object does not support item assignment
# t.append(50) # AttributeError: no append method
len(t) # 4
In [ ]:
# SETS are unordered, have unique elements and mutable
s = {3, 1, 4, 1, 5, 9, 2, 6, 5} # removes duplicates automatically
print(s) # same initial order is not guaranteed
# Empty set: use set()
# = {} creates an empty DICT
empty = set()
print(type(empty)) # <class 'set'>
x = set('ABCDE')
y = set('CDEFG')
x | y # x.union(y)
x & y # x.intersection(y))
x - y # x.difference(y)) - meaning in x but not y
x.isdisjoint(y) # True if no common elements
y < x # also y.issubset(x)
x > y # x.issuperset(y)
x.add('Z') # add single element
x.discard('Z') # remove if present - no error if absent (unlike remove())
x.pop() # removes and returns an ARBITRARY element
Functions and Lambda
In [18]:
# Standard function definition with a default arguments
def get_confidence_band(ma, sd, level=2):
upper = ma + level * sd
lower = ma - level * sd
return (upper, lower)
In [ ]:
# Lambda: anonymous, single-expression functions
# syntax: lambda arg1, arg2 : expression
sq = lambda x: x ** 2
sq(9) # 81
# map() - apply function element-wise across one or more iterables
list(map(lambda x, y: x + y, [1, 2, 3], [10, 20, 30])) # [11, 22, 33]
# filter() - keep elements where function returns True
nums = [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
list(filter(lambda x: x > 5, nums)) # [8, 13, 21, 34]
tags = ['buy', 'sell', 'sell', 'buy', 'buy']
list(filter(lambda x: x == 'buy', tags)) # ['buy', 'buy', 'buy']
NumPy Arrays
In [ ]:
import numpy as np
# Create from list or tuple
a = np.array([1, 2, 3, 4, 5]) # 1D - shape (5,)
b = np.array([[1,2,3],[4,5,6]]) # 2D - shape (2,3)
# arange vs linspace
np.arange(0, 10, 2) # [0,2,4,6,8] - step-based, EXCLUDES stop
np.linspace(0, 10, 5) # [0, 2.5, 5, 7.5, 10] - count-based, INCLUDES stop
np.linspace(0, 10, 5, endpoint=False) # excludes stop
np.linspace(0, 10, 5, retstep=True) # returns (array, step_size)
In [ ]:
# Shape, ndim, dtype
a = np.array([[1,2,3],[4,5,6]])
a.shape # (2, 3)
a.ndim # 2
a.dtype # dtype('int64')
# Reshape: total elements must stay the same
np.arange(12).reshape(3, 4) # 3x4 matrix
# reshape creates a VIEW not a copy, thus mutations propagate
A = np.arange(6)
B = A.reshape(2, 3)
A[0] = 99
print(B[0, 0]) # returns 99, so B reflects the change in A
In [ ]:
# Special arrays
np.zeros((3, 3)) # all 0.0 floats
np.ones((4, 4), dtype=int) # all 1 integers
np.identity(4) # 4x4 identity matrix (diagonal = 1)
In [ ]:
# Indexing and slicing
A = np.arange(50).reshape(5, 10)
A[1, 3] # row 1, col 3
A[-1, -1] # last row, last col
A[1, :] # entire row 1
A[:, 3] # entire col 3
A[:3, 2:] # first 3 rows, cols from 2 onward
A[::2, ::3] # every 2nd row, every 3rd col
A[:, :-2] # all rows, drop last 2 cols
# stop is always EXCLUSIVE
In [ ]:
# Vectorization: operations apply element-wise without explicit loops
V = np.array([1, 2, 3, 4, 5])
V + 10 # [11, 12, 13, 14, 15]
V * 3 # [ 3, 6, 9, 12, 15]
V ** 2 # [ 1, 4, 9, 16, 25]
# Two arrays: must have same shape OR be broadcastable
A = np.array([[1,2,3],[4,5,6],[7,8,9]])
B = np.array([1,2,3]) # shape (3,) broadcasts across rows of A
A * B # each row multiplied element-wise by [1,2,3]
# Mismatched non-broadcastable shapes -> ValueError
# np.ones((3,3)) + np.ones((4,4)) -- raises ValueError
In [ ]:
# Column vector trick: changing broadcast direction
B = np.array([1, 2, 3])
B[:, np.newaxis] # shape changes from (3,) to (3,1) and now broadcasts across columns
# Comparison and logical operations
A = np.array([[11,12],[21,22]])
B = np.array([[11,99],[21,22]])
A == B # element-wise boolean array
np.array_equal(A, B) # True only if ALL elements match
np.logical_and(A > 10, A < 20) # element-wise AND
Pandas Series and DataFrame
In [ ]:
import pandas as pd
import numpy as np
# Series: 1D labelled array
s = pd.Series([150.0, 2800.0, 720.0], index=['AAPL', 'GOOG', 'TSLA'])
# Adding Series with DIFFERENT indexes creates NaN for unmatched labels
s1 = pd.Series([1, 2], index=['AAPL', 'MSFT'])
s2 = pd.Series([10, 20], index=['MSFT', 'GOOG'])
s1 + s2 # AAPL->NaN, MSFT->12, GOOG->NaN
s.isnull() # boolean mask of NaN positions
s.dropna() # drop NaN entries
s.fillna(0) # replace NaN with scalar
s.apply(np.sqrt) # apply any function element-wise
In [ ]:
# DataFrame creation
df = pd.DataFrame({
'ticker': ['AAPL', 'GOOG', 'TSLA'],
'qty': [100, 50, 200],
'price': [150.0, 2800.0, 720.0]
})
# Specify column order and custom index at creation time
df2 = pd.DataFrame(df, columns=['price', 'qty'], index=['r1','r2','r3'])
# Use an existing column as the index
df.set_index('ticker', inplace=True)
In [ ]:
# CSV I/O
df = pd.read_csv('prices.csv')
df.head() # first 5 rows
df.tail() # last 5 rows
df.shape # (rows, cols)
# Drop columns and rows
df.drop(['Volume', 'Adj Close'], axis=1, inplace=True) # axis=1 targets columns
df.drop(df.index[[2, 4]]) # drop rows by position
# Rename columns
df.rename(columns={'Close Price': 'Close', 'Open Price': 'Open'}, inplace=True)
# Sort
df.sort_values(by='Close', ascending=False)
In [ ]:
# .loc vs .iloc
# .loc is label-based, stop value is included
df.loc[0:4, 'Close'] # rows 0,1,2,3,4 (5 rows) for column 'Close'
df.loc[:, ['Open', 'Close']] # all rows, two columns by name
# .iloc is position-based (like numpy), stop value is not included
df.iloc[:4] # rows 0,1,2,3 (4 rows)
df.iloc[1:3, 2:5] # rows 1-2, cols 2-4
df.iloc[[1,3,5], [1,3,5]] # exact row and col positions
In [ ]:
# Missing values
df.isnull().sum() # count NaNs per column
df.fillna(0) # fill all NaN with 0
df['Close'].fillna(method='ffill') # forward-fill: carry previous value forward
df['Close'].fillna(method='bfill') # back-fill: use next valid value
df.dropna() # drop any row with at least one NaN
df.dropna(axis=1) # drop any column with at least one NaN
df.replace({-9999: np.nan, 0: np.nan}) # replace specific sentinel values
In [ ]:
# Descriptive stats: all return a scalar on a Series
col = df['Close']
col.count() # non-null count
col.mean() # arithmetic mean
col.median()
col.mode() # may return multiple values (Series)
col.var() # variance
col.std() # standard deviation
col.skew() # >0 right-skewed, <0 left-skewed
col.kurt() # >0 leptokurtic (fat tails), <0 platykurtic
col.diff() # first difference: t minus (t-1)
col.pct_change() # (t - (t-1)) / (t-1)
col.cov(df['Open']) # covariance with another Series
col.corr(df['Open']) # correlation coefficient with another Series
In [ ]:
# Rolling and expanding windows
# expanding mean converges to the overall mean
# rolling mean reacts faster to recent data
col.rolling(window=20).mean() # moving average (NaN for first 19 rows)
col.rolling(window=20).std() # rolling std dev
col.expanding(min_periods=20).mean() # uses all data up to each point in time
In [ ]:
# GroupBy -> aggregate -> filter -> display
grouped = df.groupby('Sector')
grouped.groups # dict of {group_key: [indices]}
grouped['Amount'].agg('mean') # mean per group
grouped['Amount'].agg(['sum', 'mean']) # multiple aggregations at once
grouped.filter(lambda x: len(x) >= 3) # keep only groups with 3 or more rows
for name, group_df in grouped: # iterate groups
print(name, group_df.shape)
In [ ]:
# Merge and Concat
left = pd.DataFrame({'id': [1, 2], 'Sector': ['Tech', 'Finance'], 'Value_left': [100, 200]})
right = pd.DataFrame({'id': [2, 3], 'Sector': ['Finance', 'Health'], 'Value_right': [20, 30]})
# merge: SQL-style joins on a key column
pd.merge(left, right, on='id') # inner join (default)
pd.merge(left, right, on='Sector', how='left') # left join
pd.merge(left, right, on='Sector', how='outer') # outer join -> NaN for missing
df1 = pd.DataFrame({'Sector': ['Tech', 'Finance'], 'Value': [10, 20]})
df2 = pd.DataFrame({'Sector': ['Health', 'Energy'], 'Value': [30, 40]})
# concat: stack DataFrames vertically or horizontally
pd.concat([df1, df2]) # vertical (axis=0)
pd.concat([df1, df2], axis=1) # horizontal (axis=1)
pd.concat([df1, df2], ignore_index=True) # reset index after stacking
pd.concat([df1, df2], keys=['x', 'y']) # adds hierarchical index