In [14]:
from request import y_true
from utils import Preprocess, MissingValue
import pickle
from fastapi import FastAPI, Request
import pandas as pd
import numpy as np
import onnx
import skl2onnx
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
import os
import onnxruntime as ort
from onnxruntime_tools import optimizer  # Optimization package





In [11]:
with open(f'model/columns.pkl', 'rb') as f:
    cols = pickle.load(f)

with open(f'model/scaler.pkl', 'rb') as f:
    sc = pickle.load(f)

with open(f'model/model.pkl', 'rb') as f:
    model = pickle.load(f)

In [12]:
initial_type = [('float_input', FloatTensorType([None, len(cols)]))]

# Convert to ONNX format
onnx_model = convert_sklearn(model, initial_types=initial_type)

# Save the ONNX model
onnx_model_path = 'model/random_forest_model.onnx'
onnx.save_model(onnx_model, onnx_model_path)
print(f"ONNX model saved at: {onnx_model_path}")

ONNX model saved at: random_forest_model.onnx


In [17]:
optimized_model_path = 'model/optimized_random_forest_model.onnx'

# Check if the model file exists
if os.path.exists(onnx_model_path):
    # Optimize the model by passing the file path to the optimizer
    optimized_model = optimizer.optimize_model(onnx_model_path)

    # Save the optimized model
    optimized_model.save_model_to_file(optimized_model_path)
    print(f"Optimized ONNX model saved at: {optimized_model_path}")

else:
    print(f"ONNX model file not found at: {onnx_model_path}")

Model producer not matched: Expect pytorch,  Got skl2onnx 1.16.0. Please specify correct --model_type parameter.


Stopping at incomplete shape inference at TreeEnsembleClassifier: TreeEnsembleClassifier
node inputs:
name: "float_input"
type {
  tensor_type {
    elem_type: 1
    shape {
      dim {
        dim_param: "float_input_d0"
      }
      dim {
        dim_value: 38
      }
    }
  }
}

node outputs:
name: "label"
type {
  tensor_type {
    elem_type: 7
  }
}

name: "probabilities"
type {
  tensor_type {
    elem_type: 0
  }
}

Optimized ONNX model saved at: optimized_random_forest_model.onnx


In [18]:
df = pd.read_excel('dataset.xlsx',engine='openpyxl')
df.drop(['Unnamed: 0', 'visit id'], axis=1, inplace=True)

In [20]:

preprocessor = Preprocess(
    dataframe=df,
    missing_value_per=0,
    variance_threshold=0,
    min_null_per=0
)
test = preprocessor._mapping(df)

m = MissingValue(test)
test = m.fill_dataframe()
y_true = test['target label / yes no']
x_test = test[cols]
x_test = sc.transform(x_test)

# y_pred = model.predict(x_test)

In [24]:
x_test = np.array(x_test, dtype=np.float32)

In [25]:
session = ort.InferenceSession(optimized_model_path)

# Run inference
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name

# Make predictions
predictions = session.run([output_name], {input_name: x_test})

In [33]:
type(predictions[0])

numpy.ndarray

In [35]:
type(y_true.values)

numpy.ndarray

In [37]:
from sklearn.metrics import f1_score
f1_score(predictions[0].astype(int), y_true.values.astype(int), average='weighted')

0.8934725519012681

In [None]:



app = FastAPI()

@app.post("/predict/")
async def predict(request: Request):
    try:
        data = await request.json()

        df = pd.read_json(data, orient='records')

        preprocessor = Preprocess(
            dataframe=df,
            missing_value_per=0,
            variance_threshold=0,
            min_null_per=0
        )
        test = preprocessor._mapping(df)

        m = MissingValue(test)
        test = m.fill_dataframe()

        x_test = test[cols]
        x_test = sc.transform(x_test)

        y_pred = model.predict(x_test)

        return {"predictions": y_pred.tolist()}

    except Exception as e:
        return {"error": str(e)}

# Run the application using: uvicorn main:app --reload
