Skip to content

Classification API

obia.classification.classify

obia.classification.classify

ClassifiedImage

class ClassifiedImage

Represents an image along with its classification results and associated properties.

classified: The classified image data. confusion_matrix: The confusion matrix of the classification results. report: A detailed report of the classification results. params: The parameters used during classification. shap_values: SHAP values for the classification results. crs: Coordinate Reference System for the image. transform: Affine transform parameters for the image.

def init(self, classified, confusion_matrix, report, shap_values, transform, crs, params): Initializes a new instance of the ClassifiedImage class.

1
2
3
4
5
6
7
8
:param classified: The classified image data.
:param confusion_matrix: The confusion matrix of the classification results.
:param report: A detailed report of the classification results.
:param shap_values: SHAP values for the classification results.
:param transform: Affine transform parameters for the image.
:param crs: Coordinate Reference System for the image.
:param params: The parameters used during classification.
:return: None

def write_geotiff(self, output_path): Writes the classified image to a GeoTIFF file.

1
2
:param output_path: Path where the GeoTIFF file will be saved.
:return: None
Source code in obia/classification/classify.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
class ClassifiedImage:
    """
    class ClassifiedImage:
        Represents an image along with its classification results and associated properties.

        classified: The classified image data.
        confusion_matrix: The confusion matrix of the classification results.
        report: A detailed report of the classification results.
        params: The parameters used during classification.
        shap_values: SHAP values for the classification results.
        crs: Coordinate Reference System for the image.
        transform: Affine transform parameters for the image.

        def __init__(self, classified, confusion_matrix, report, shap_values, transform, crs, params):
            Initializes a new instance of the ClassifiedImage class.

            :param classified: The classified image data.
            :param confusion_matrix: The confusion matrix of the classification results.
            :param report: A detailed report of the classification results.
            :param shap_values: SHAP values for the classification results.
            :param transform: Affine transform parameters for the image.
            :param crs: Coordinate Reference System for the image.
            :param params: The parameters used during classification.
            :return: None

        def write_geotiff(self, output_path):
            Writes the classified image to a GeoTIFF file.

            :param output_path: Path where the GeoTIFF file will be saved.
            :return: None
    """
    classified = None
    confusion_matrix = None
    report = None
    params = None
    shap_values = None
    crs = None
    transform = None

    def __init__(self, classified, confusion_matrix, report, shap_values, transform, crs, params):
        self.classified = classified
        self.report = report
        self.confusion_matrix = confusion_matrix
        self.shap_values = shap_values
        self.params = params
        self.transform = transform
        self.crs = crs

    def write_geotiff(self, output_path):
        """
        :param output_path: Path where the GeoTIFF file will be saved.
        :return: None
        """
        _write_geotiff(self.classified, output_path, self.crs, self.transform)

write_geotiff(output_path)

:param output_path: Path where the GeoTIFF file will be saved. :return: None

Source code in obia/classification/classify.py
59
60
61
62
63
64
def write_geotiff(self, output_path):
    """
    :param output_path: Path where the GeoTIFF file will be saved.
    :return: None
    """
    _write_geotiff(self.classified, output_path, self.crs, self.transform)

classify(segments, training_classes, acceptable_classes_gdf=None, method='rf', test_size=0.2, compute_reports=False, compute_shap=False, sample_shap=False, **kwargs)

:param segments: A GeoDataFrame containing the segments to be classified. :param training_classes: A DataFrame containing the training data with 'feature_class' as the target variable. :param acceptable_classes_gdf: A GeoDataFrame of acceptable classes with geometries to mask predictions. Default is None. :param method: The machine learning method to use for classification ('rf' for RandomForest, 'mlp' for MLPClassifier). Default is 'rf'. :param test_size: The proportion of the dataset to include in the test split. Default is 0.5. :param compute_reports: Whether to compute and return classification reports and confusion matrix. Default is False. :param compute_shap: Whether to compute and return SHAP values for feature importance. Default is False. :param kwargs: Additional keyword arguments passed to the classifier. :return: An object of ClassifiedImage containing the classified segments, confusion matrix, classification report, SHAP values, and classifier parameters.

Source code in obia/classification/classify.py
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def classify(segments, training_classes, acceptable_classes_gdf=None,
             method='rf', test_size=0.2, compute_reports=False,
             compute_shap=False, sample_shap=False, **kwargs):
    """
    :param segments: A GeoDataFrame containing the segments to be classified.
    :param training_classes: A DataFrame containing the training data with 'feature_class' as the target variable.
    :param acceptable_classes_gdf: A GeoDataFrame of acceptable classes with geometries to mask predictions. Default is None.
    :param method: The machine learning method to use for classification ('rf' for RandomForest, 'mlp' for MLPClassifier). Default is 'rf'.
    :param test_size: The proportion of the dataset to include in the test split. Default is 0.5.
    :param compute_reports: Whether to compute and return classification reports and confusion matrix. Default is False.
    :param compute_shap: Whether to compute and return SHAP values for feature importance. Default is False.
    :param kwargs: Additional keyword arguments passed to the classifier.
    :return: An object of ClassifiedImage containing the classified segments, confusion matrix, classification report, SHAP values, and classifier parameters.
    """
    shap_values = None
    x = training_classes.drop(['feature_class', 'geometry', 'segment_id'], axis=1)
    y = training_classes['feature_class']

    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=test_size, random_state=42)

    scaler = StandardScaler()
    scaler.fit(x_train)
    x_train = scaler.transform(x_train)

    scaler = StandardScaler()
    scaler.fit(x_test)
    x_test = scaler.transform(x_test)

    if method == 'rf':
        classifier = RandomForestClassifier(**kwargs)
    elif method == 'mlp':
        classifier = MLPClassifier(**kwargs)
    else:
        raise ValueError('An unsupported classification algorithm was requested')

    classifier.fit(x_train, y_train)
    if compute_shap:
        try:
            import shap
        except ImportError as exc:
            raise ImportError(
                "SHAP support requires the optional 'explain' dependencies. "
                "Install with `pip install obia[explain]`."
            ) from exc

        explainer = None
        if isinstance(classifier, RandomForestClassifier):
            explainer = shap.TreeExplainer(classifier)
        elif isinstance(classifier, MLPClassifier):
            if sample_shap:
                x_train = shap.sample(x_train, 500, random_state=42)
                explainer = shap.KernelExplainer(classifier.predict_proba, x_train)
            else:
                explainer = shap.KernelExplainer(classifier.predict_proba, x_train)

        shap_values = explainer.shap_values(x_train)

    y_pred = classifier.predict(x_test)

    report = None
    cm = None
    if compute_reports:
        cm = confusion_matrix(y_test, y_pred)
        report = classification_report(y_test, y_pred)

    x_pred = segments.drop(['feature_class', 'geometry', 'segment_id'], axis=1, errors='ignore')
    scaler = StandardScaler()

    scaler.fit(x_pred)
    x_pred = scaler.transform(x_pred)

    # Initialize predicted classes and prediction margins
    y_pred_all = np.full(x_pred.shape[0], None)
    prediction_margin = np.full(x_pred.shape[0], None)

    for idx, segment in segments.iterrows():
        acceptable_classes = None

        if acceptable_classes_gdf is not None:
            # Check intersection with acceptable_classes_gdf
            intersections = acceptable_classes_gdf[acceptable_classes_gdf.intersects(segment.geometry)]
            if not intersections.empty:
                # If there are intersections, get the list of acceptable classes
                acceptable_classes = intersections.iloc[0]['acceptable_classes']

        if acceptable_classes is not None:
            # Predict the class and filter by acceptable classes
            proba = classifier.predict_proba([x_pred[idx]])
            proba_df = pd.DataFrame(proba, columns=classifier.classes_)
            proba_df_filtered = proba_df[proba_df.columns.intersection(acceptable_classes)]
            y_pred_all[idx] = proba_df_filtered.idxmax(axis=1).values[0]
            top2_probs = np.partition(proba_df_filtered.values[0], -2)[-2:]
            prediction_margin[idx] = top2_probs[1] - top2_probs[0]
        else:
            # Predict the class without filtering
            proba = classifier.predict_proba([x_pred[idx]])
            y_pred_all[idx] = classifier.predict([x_pred[idx]])[0]
            top2_probs = np.partition(proba[0], -2)[-2:]
            prediction_margin[idx] = top2_probs[1] - top2_probs[0]

    params = classifier.get_params()

    segments['predicted_class'] = y_pred_all
    segments['prediction_margin'] = prediction_margin

    for col in segments.columns:
        if col != segments.geometry.name:
            if np.issubdtype(segments[col].dtype, np.integer):
                segments[col] = segments[col].astype(pd.Int64Dtype())
            elif np.issubdtype(segments[col].dtype, np.floating):
                segments[col] = segments[col].astype(float)

    segments['predicted_class'] = segments['predicted_class'].astype(pd.Int64Dtype())
    segments['prediction_margin'] = segments['prediction_margin'].astype(float)

    return ClassifiedImage(segments, cm, report, shap_values, None, None, params)