Ada beberapa model pre-trained yang sering digunakan untuk transfer learning
dalam tugas klasifikasi teks. Berikut beberapa model yang populer:
Implementation
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import
pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, Dense,
MaxPooling1D, Flatten, LSTM,BatchNormalization
from keras.utils import to_categorical
from sklearn.metrics import classification_report,
confusion_matrix
from keras.callbacks import EarlyStopping
import re
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import PorterStemmer
import matplotlib.pyplot as plt
from imblearn.over_sampling import SMOTE
from keras.optimizers import Adam
• Load Dataset
# Load dataset
import io
datasetPath = io.BytesIO(uploaded['cleaned_reviews.csv'])
df = pd.read_csv(datasetPath)
df['cleaned_review'].fillna('0', inplace=True)
plt.figure(figsize=(8, 6))
resampled_sentiment_counts.plot(kind='bar', color=['red',
'green', 'blue'])
plt.title('Resampled Sentiment Distribution after SMOTE')
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.xticks(rotation=0)
plt.show()
#Mencetak setiap token dalam urutan dan kata yang sesuai
plt.figure(figsize=(8, 6))
resampled_sentiment_counts.plot(kind='bar', color=['red',
'green', 'blue'])
plt.title('Resampled Sentiment Distribution after SMOTE')
plt.xlabel('Sentiment')
plt.ylabel('Count')
plt.xticks(rotation=0)
plt.show()
# Model dengan lapisan Penyematan menggunakan penyematan GloVe yang
telah di pre-trained sebelumnya
model = Sequential()
model.add(Embedding(input_dim=len(word_index) + 1,
output_dim=100, weights=[embedding_weights],
input_length=max_sequence_length, trainable=True))
model.add(LSTM(64, return_sequences=True, dropout=0.1))
model.add(BatchNormalization())
model.add(LSTM(32, return_sequences=False, dropout=0.1))
model.add(BatchNormalization())
model.add(Dense(3, activation='softmax'))
B. Melakukan Label Encoder
Tahap ini penting dilaakukan untuk mengubah nilai-nilai kategori
menjadi angka atau labe. Terdapat 3 (tiga) fitur yang akan dirubah
yaitu fitur ’text’, fitur ’ actual label’, fitur ‘predicted label’.
# Create an empty list to store data
data = []
loss = history.history['loss']
val_loss = history.history['val_loss']
plt.figure(figsize=(14, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation
Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()
• Lakukan predict pada data test hasil splitting kemudian lakukan
perbandingan antara true label dan predict labelnya
# Get Predictions
y_train_pred = model.predict(x_train)
y_val_pred = model.predict(x_val)
import sklearn
y_test_pred = model.predict(x_test)
y_true = [1, 0, 1, 2, 1, 0, 1, 2, 2, 0]
y_pred = [1, 0, 1, 2, 1, 0, 1, 1, 2, 0]