Intro
Smartclide provides an environment to support the development of service-oriented software. This service classification aims to classify the same web services based on their functionality, which can be helpful in later stages such as service composition.
Requirements
The list of the third-party library are listed on requirments.txt file; however, the two main used library and requirements are:
- Python 3.7+
- HuggingFace
- System Requirements: CPU: 2cv RAM: 8 GB
Usage
The trained models have been packaged using the Python Setuptools library. This package is available in this GitHub repository .
Moreover, the below class demonstrates using the published service classification model directly.
#!/usr/bin/python3
# Eclipse Public License 2.0
import re
import os
import pandas as pd
import numpy as np
class PredictServiceClass:
TRAINED_MODEL="zakieh/serv_classification"
def __init__(self):
self.df = None
self.classifier_model=None
self.tokenizer_class = None
self.classifier_config=None
self.max_desc_len_public=150
def loadTrainedClassifier(self):
"""
Load trained web service classifier
:return: trained model obj
"""
import pickle
from transformers import BertForSequenceClassification, AdamW, BertConfig
from transformers import BertTokenizer, BertForMaskedLM,BertConfig
# try:
model_hub = self.TRAINED_MODEL
self.classifier_model = BertForSequenceClassification.from_pretrained(model_hub,force_download=True)
self.tokenizer_class = BertTokenizer.from_pretrained(model_hub,force_download=True)
self.classifier_config= BertConfig.from_pretrained(model_hub,force_download=True)
return (self.classifier_model)
def get_prediction(self, text):
"""
Predict service class based on user input text and DL trained model
:return: string param specifies service class
"""
import torch
k=2
# prepare our text into tokenized sequence
inputs = self.tokenizer_class(text, padding=True, truncation=True, max_length=100, return_tensors="pt")
# perform inference to our model
outputs = self.classifier_model(**inputs)
probs = outputs[0].softmax(1)
top_tensors = torch.topk(probs.flatten(), k).indices
# get id and lable from model config
id2label = self.classifier_config.id2label
top_cat_id = []
for i in range(0, k ):
top_cat_id.append(top_tensors[i].item())
top_cat_lable = []
for i in range(0, k ):
class_ = id2label[(top_cat_id[i])]
top_cat_lable.append(class_)
precentage=torch.topk(probs.flatten(), 1).values.item()*100
res=list(set([top_cat_lable[0],precentage]))
res2=list(set([top_cat_lable[1],precentage]))
return [res,res2]
def classify_service_data(self,df,clm_name):
if df is not None:
self.df = df
if clm_name in self.df:
self.df['class_precent'] = self.df[clm_name].astype(str).apply(self.get_prediction)
return self.df
Use class
#Loading model recommended to execute on background
obj=PredictServiceClass()
obj.loadTrainedClassifier()
#use model
Service_description="The TransLoc OpenAPI is a public RESTful API which allows developers to access real-time vehicle tracking information and incorporate this data into their website or mobile application."
service_classes=obj.get_prediction(Service_description)
print(service_classes)