Machine Learning - Price/Value Prediction (E2E)

Overview

Build a regression pipeline with preprocessing, cross-validation, and a simple serving plan.

Pipeline + Ridge

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.linear_model import Ridge

num_cols = ["sqft", "bath", "age"]
cat_cols = ["city", "type"]

pre = ColumnTransformer([
  ("num", Pipeline([("imp", SimpleImputer()), ("sc", StandardScaler())]), num_cols),
  ("cat", Pipeline([("imp", SimpleImputer(strategy="most_frequent")), ("oh", OneHotEncoder(handle_unknown="ignore"))]), cat_cols),
])

model = Pipeline([("pre", pre), ("reg", Ridge(alpha=1.0))])
model.fit(X_train, y_train)