Skip to content

Unsupervised Learning Example

from anoteai import Anote
from time import sleep
import pandas as pd

api_key = 'INSERT_API_KEY_HERE'

# Initialize the Anote class
anote = Anote(api_key)

# Prepare the training data
train_files = [
    "./example_data/unsupervised_data/UpreachPitchDeck.pdf",
    "./example_data/unsupervised_data/PrivateChatbotDeck.pdf",
    "./example_data/unsupervised_data/PitchDeck_Anote.pdf",
    "./example_data/unsupervised_data/AnoteDifferentiators.pdf"
]

# Train the Unsupervised Model
train_response = anote.train(
    task_type=NLPTask.UNSUPERVISED,
    model_type=ModelType.RAG_UNSUPERVISED,
    dataset_name="UNSUPERVISED_LLM_RAG_TRAINING",
    document_files=train_files
)

modelId = train_response["models"][0]["id"]
datasetId = train_response["datasetId"]
print(f"Trained model ID: {modelId}")
print(f"Dataset ID: {datasetId}")

# Training the Model
while True:
    train_status_response = anote.checkStatus(
        model_id=modelId,
    )
    if train_status_response["isComplete"] == True:
        print("trained model complete...")
        break
    else:
        sleep(3)

# Making predictions on the test dataset with the document file
test_files = [
    "./example_data/unsupervised_data/DataLabeler_PitchDeck.pdf",
    "./example_data/unsupervised_data/AI_Talk.pdf"
]

predict_all_response = anote.predictAll(
    model_id=modelId,
    model_types=[],
    dataset_id=datasetId,
    report_name="Unsupervised Report",
    input_text_col_index=0,
    actual_label_col_index=None,  # No labels needed for unsupervised learning
    document_files=test_files  # Path to the testing document files
)

print("Predictions:", predict_all_response)
predictReportId = predict_all_response["predictReportId"]

while True:
    preds_status_response = anote.checkStatus(
        predict_report_id=predictReportId,
    )
    if preds_status_response["isComplete"] == True:
        break
    else:
        sleep(3)

# Viewing the predictions
predictions = anote.viewPredictions(
    predict_report_id=predictReportId,
    dataset_id=datasetId,
    search_query=None,
    page_number=1
)
print("Predictions: ", predictions)

# Making a single prediction
single_prediction = anote.predict(
    model_id=modelId,
    text="What is anote about",
    document_files=None  # No additional documents required for single prediction
)

print("Single Prediction:", single_prediction)

As an output we get:

Trained model ID: 12345
Dataset ID: 67890
Predictions: {'predictions': ['Private Chatbot is a fine tuned, secure chatbot for your data.', 'Upreach is an AI sales automation tool'], 'predictReportId': 'report_123456789'}
Single Prediction: {'prediction': 'Anote is an innovative AI platform that enables efficient document labeling, training, and inference.'}