Run this notebook: Open in Colab Open in Kaggle

Sentiment Analysis at Scale¶

Fine-tuning BERT/DistilBERT for sentiment, aspect-based analysis, and multi-class classification.

# Install dependencies
# !pip install transformers torch numpy

Sentiment Analysis with Transformers¶

# Sentiment analysis with transformers (requires transformers)
'''
from transformers import pipeline

# Binary sentiment
sentiment_pipeline = pipeline("sentiment-analysis")
result = sentiment_pipeline("I love this product!")
print(result)  # [{'label': 'POSITIVE', 'score': 0.9998}]

# Fine-grained sentiment (5-class)
sentiment_5class = pipeline(
    "sentiment-analysis",
    model="nlptown/bert-base-multilingual-uncased-sentiment"
)
result = sentiment_5class("The product is okay but could be better.")
print(result)  # [{'label': '3 stars', 'score': 0.75}]

# Emotion detection
emotion_pipeline = pipeline(
    "text-classification",
    model="j-hartmann/emotion-english-distilroberta-base"
)
result = emotion_pipeline("I'm so excited about the news!")
print(result)  # [{'label': 'joy', 'score': 0.95}]

# Aspect-based sentiment
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

tokenizer = AutoTokenizer.from_pretrained("yangheng/deberta-v3-base-absa-v1.1")
model = AutoModelForSequenceClassification.from_pretrained("yangheng/deberta-v3-base-absa-v1.1")

text = "The food was great but the service was slow."
aspect = "food"
inputs = tokenizer(f"{text} [SEP] {aspect}", return_tensors="pt")
outputs = model(**inputs)
prediction = torch.argmax(outputs.logits, dim=1).item()
# 0: negative, 1: neutral, 2: positive
'''

print("Popular Sentiment Models:\n")
print("Binary/Multi-class:")
print("  • distilbert-base-uncased-finetuned-sst-2-english (binary)")
print("  • cardiffnlp/twitter-roberta-base-sentiment (3-class)")
print("  • nlptown/bert-base-multilingual-uncased-sentiment (5-star)")
print("\nEmotion Detection:")
print("  • j-hartmann/emotion-english-distilroberta-base (7 emotions)")
print("  • bhadresh-savani/distilbert-base-uncased-emotion (6 emotions)")
print("\nAspect-based:")
print("  • yangheng/deberta-v3-base-absa-v1.1")
print("  • ABSA models for restaurant/laptop reviews")

Simple Sentiment Classifier¶

from typing import Dict, List, Tuple
from dataclasses import dataclass
from collections import Counter

@dataclass
class SentimentResult:
    """Sentiment analysis result"""
    text: str
    sentiment: str  # 'positive', 'negative', 'neutral'
    score: float  # 0.0 (negative) to 1.0 (positive)
    confidence: float
    
class SimpleSentimentAnalyzer:
    """Rule-based sentiment analyzer (demo only)"""
    
    def __init__(self):
        # Simple lexicons
        self.positive_words = {
            'excellent', 'amazing', 'great', 'wonderful', 'fantastic',
            'good', 'best', 'perfect', 'love', 'awesome', 'beautiful',
            'brilliant', 'outstanding', 'superior', 'terrific'
        }
        
        self.negative_words = {
            'terrible', 'awful', 'horrible', 'bad', 'worst', 'poor',
            'disappointing', 'useless', 'waste', 'hate', 'disgusting',
            'pathetic', 'inferior', 'mediocre', 'disaster'
        }
        
        self.negation_words = {'not', 'no', 'never', "n't", 'neither', 'nor'}
    
    def analyze(self, text: str) -> SentimentResult:
        """Analyze sentiment of text"""
        words = text.lower().split()
        
        pos_count = 0
        neg_count = 0
        
        # Check for negation
        negated = False
        
        for i, word in enumerate(words):
            # Check for negation in previous word
            if i > 0 and words[i-1] in self.negation_words:
                negated = True
            else:
                negated = False
            
            # Count sentiment words
            if word in self.positive_words:
                if negated:
                    neg_count += 1
                else:
                    pos_count += 1
            elif word in self.negative_words:
                if negated:
                    pos_count += 1
                else:
                    neg_count += 1
        
        # Calculate score
        total = pos_count + neg_count
        if total == 0:
            sentiment = 'neutral'
            score = 0.5
            confidence = 0.5
        else:
            score = pos_count / total
            confidence = abs(pos_count - neg_count) / total
            
            if score > 0.6:
                sentiment = 'positive'
            elif score < 0.4:
                sentiment = 'negative'
            else:
                sentiment = 'neutral'
        
        return SentimentResult(
            text=text,
            sentiment=sentiment,
            score=score,
            confidence=confidence
        )
    
    def batch_analyze(self, texts: List[str]) -> List[SentimentResult]:
        """Analyze multiple texts"""
        return [self.analyze(text) for text in texts]

# Test sentiment analyzer
analyzer = SimpleSentimentAnalyzer()

test_texts = [
    "This product is excellent! I love it!",
    "Terrible quality, very disappointed.",
    "It's okay, nothing special.",
    "Not bad, but not great either.",
    "The service was not good at all."
]

print("Sentiment Analysis Results:\n")
for text in test_texts:
    result = analyzer.analyze(text)
    print(f"Text: {text}")
    print(f"  Sentiment: {result.sentiment.upper()}")
    print(f"  Score: {result.score:.2f}")
    print(f"  Confidence: {result.confidence:.2f}")
    print()

Fine-grained Sentiment (Star Ratings)¶

@dataclass
class RatingResult:
    """Star rating result"""
    text: str
    rating: int  # 1-5 stars
    confidence: float

class StarRatingClassifier:
    """Simple 5-star rating classifier"""
    
    def __init__(self):
        # Intensity modifiers
        self.strong_positive = {'excellent', 'amazing', 'perfect', 'outstanding', 'love'}
        self.positive = {'good', 'great', 'nice', 'fine', 'ok', 'okay'}
        self.strong_negative = {'terrible', 'awful', 'horrible', 'worst', 'hate'}
        self.negative = {'bad', 'poor', 'disappointing', 'mediocre'}
    
    def predict_rating(self, text: str) -> RatingResult:
        """Predict star rating (1-5)"""
        words = set(text.lower().split())
        
        # Count different sentiment levels
        strong_pos = len(words & self.strong_positive)
        pos = len(words & self.positive)
        strong_neg = len(words & self.strong_negative)
        neg = len(words & self.negative)
        
        # Calculate score
        score = (strong_pos * 2 + pos) - (strong_neg * 2 + neg)
        
        # Map to 1-5 stars
        if score >= 3:
            rating = 5
            confidence = 0.9
        elif score >= 1:
            rating = 4
            confidence = 0.8
        elif score >= -1:
            rating = 3
            confidence = 0.6
        elif score >= -3:
            rating = 2
            confidence = 0.7
        else:
            rating = 1
            confidence = 0.9
        
        return RatingResult(
            text=text,
            rating=rating,
            confidence=confidence
        )

# Test star rating
rating_classifier = StarRatingClassifier()

reviews = [
    "Absolutely perfect! Love it!",
    "Pretty good overall.",
    "It's okay, nothing special.",
    "Disappointing and poor quality.",
    "Terrible! Worst purchase ever!"
]

print("Star Rating Predictions:\n")
for review in reviews:
    result = rating_classifier.predict_rating(review)
    stars = '★' * result.rating + '☆' * (5 - result.rating)
    print(f"{stars} ({result.rating}/5) - {review}")
    print(f"  Confidence: {result.confidence:.1%}\n")

Aspect-based Sentiment Analysis¶

from typing import Optional
import re

@dataclass
class AspectSentiment:
    """Sentiment for a specific aspect"""
    aspect: str
    sentiment: str  # 'positive', 'negative', 'neutral'
    score: float
    text_snippet: Optional[str] = None

class AspectBasedSentimentAnalyzer:
    """Simple aspect-based sentiment analysis"""
    
    def __init__(self):
        # Common aspects for restaurant reviews
        self.aspects = {
            'food': ['food', 'meal', 'dish', 'cuisine', 'taste'],
            'service': ['service', 'staff', 'waiter', 'server', 'waitress'],
            'ambiance': ['atmosphere', 'ambiance', 'decor', 'environment'],
            'price': ['price', 'cost', 'value', 'expensive', 'cheap']
        }
        
        self.positive_words = {
            'great', 'excellent', 'good', 'amazing', 'perfect',
            'delicious', 'wonderful', 'fantastic', 'friendly'
        }
        
        self.negative_words = {
            'bad', 'terrible', 'poor', 'awful', 'horrible',
            'slow', 'rude', 'expensive', 'disappointing'
        }
    
    def extract_aspect_sentiments(self, text: str) -> List[AspectSentiment]:
        """Extract sentiment for each aspect"""
        results = []
        sentences = re.split(r'[.!?,]', text.lower())
        
        for aspect, keywords in self.aspects.items():
            # Find sentences mentioning this aspect
            relevant_sentences = [
                sent for sent in sentences
                if any(keyword in sent for keyword in keywords)
            ]
            
            if not relevant_sentences:
                continue
            
            # Analyze sentiment in relevant sentences
            pos_count = 0
            neg_count = 0
            
            for sent in relevant_sentences:
                words = sent.split()
                pos_count += sum(1 for w in words if w in self.positive_words)
                neg_count += sum(1 for w in words if w in self.negative_words)
            
            # Determine sentiment
            total = pos_count + neg_count
            if total == 0:
                sentiment = 'neutral'
                score = 0.5
            else:
                score = pos_count / total
                if score > 0.6:
                    sentiment = 'positive'
                elif score < 0.4:
                    sentiment = 'negative'
                else:
                    sentiment = 'neutral'
            
            results.append(AspectSentiment(
                aspect=aspect,
                sentiment=sentiment,
                score=score,
                text_snippet=relevant_sentences[0].strip() if relevant_sentences else None
            ))
        
        return results

# Test aspect-based sentiment
absa = AspectBasedSentimentAnalyzer()

reviews = [
    "The food was excellent but the service was terrible.",
    "Great atmosphere and friendly staff. The meal was delicious!",
    "Good food but way too expensive for the portion size."
]

print("Aspect-based Sentiment Analysis:\n")
for review in reviews:
    print(f"Review: {review}")
    aspects = absa.extract_aspect_sentiments(review)
    
    if aspects:
        for asp in aspects:
            emoji = '😊' if asp.sentiment == 'positive' else '😞' if asp.sentiment == 'negative' else '😐'
            print(f"  {emoji} {asp.aspect.capitalize()}: {asp.sentiment} (score: {asp.score:.2f})")
    else:
        print("  No aspects detected")
    print()

Production Sentiment System¶

from collections import defaultdict, deque
import time
import hashlib

class ProductionSentimentAnalyzer:
    """Production-ready sentiment analysis system"""
    
    def __init__(self):
        self.analyzer = SimpleSentimentAnalyzer()
        self.cache = {}
        self.stats = {
            "total_analyses": 0,
            "cache_hits": 0,
            "sentiment_distribution": defaultdict(int),
            "avg_confidence": 0.0,
            "confidences": deque(maxlen=1000),
            "low_confidence_count": 0
        }
    
    def _get_cache_key(self, text: str) -> str:
        """Generate cache key"""
        return hashlib.md5(text.encode()).hexdigest()
    
    def analyze(
        self,
        text: str,
        use_cache: bool = True,
        confidence_threshold: float = 0.5
    ) -> SentimentResult:
        """Analyze sentiment with caching and monitoring"""
        # Check cache
        cache_key = self._get_cache_key(text)
        if use_cache and cache_key in self.cache:
            self.stats["cache_hits"] += 1
            self.stats["total_analyses"] += 1
            return self.cache[cache_key]
        
        # Analyze
        result = self.analyzer.analyze(text)
        
        # Cache result
        if use_cache:
            self.cache[cache_key] = result
        
        # Update stats
        self.stats["total_analyses"] += 1
        self.stats["sentiment_distribution"][result.sentiment] += 1
        self.stats["confidences"].append(result.confidence)
        self.stats["avg_confidence"] = sum(self.stats["confidences"]) / len(self.stats["confidences"])
        
        if result.confidence < confidence_threshold:
            self.stats["low_confidence_count"] += 1
        
        return result
    
    def batch_analyze(self, texts: List[str]) -> List[SentimentResult]:
        """Analyze multiple texts"""
        return [self.analyze(text) for text in texts]
    
    def get_sentiment_summary(self, texts: List[str]) -> Dict:
        """Get aggregate sentiment summary"""
        results = self.batch_analyze(texts)
        
        sentiments = [r.sentiment for r in results]
        scores = [r.score for r in results]
        
        return {
            "total_texts": len(texts),
            "positive_count": sentiments.count('positive'),
            "negative_count": sentiments.count('negative'),
            "neutral_count": sentiments.count('neutral'),
            "positive_percentage": sentiments.count('positive') / len(texts) * 100,
            "avg_sentiment_score": sum(scores) / len(scores)
        }
    
    def get_stats(self) -> Dict:
        """Get analysis statistics"""
        total = max(self.stats["total_analyses"], 1)
        return {
            "total_analyses": self.stats["total_analyses"],
            "cache_hits": self.stats["cache_hits"],
            "cache_hit_rate": self.stats["cache_hits"] / total,
            "sentiment_distribution": dict(self.stats["sentiment_distribution"]),
            "avg_confidence": self.stats["avg_confidence"],
            "low_confidence_count": self.stats["low_confidence_count"],
            "low_confidence_rate": self.stats["low_confidence_count"] / total
        }

# Test production system
prod_analyzer = ProductionSentimentAnalyzer()

customer_feedback = [
    "Excellent service! Very happy with my purchase.",
    "Product arrived damaged and support was unhelpful.",
    "It's okay, does what it says.",
    "Amazing quality, would buy again!",
    "Not worth the price, very disappointed.",
    "Good product but shipping took too long."
]

print("Customer Feedback Analysis:\n")
for feedback in customer_feedback:
    result = prod_analyzer.analyze(feedback)
    emoji = '😊' if result.sentiment == 'positive' else '😞' if result.sentiment == 'negative' else '😐'
    print(f"{emoji} {result.sentiment.upper()} (score: {result.score:.2f}): {feedback}")

# Get summary
summary = prod_analyzer.get_sentiment_summary(customer_feedback)
print(f"\nSentiment Summary:")
print(f"  Total: {summary['total_texts']}")
print(f"  Positive: {summary['positive_count']} ({summary['positive_percentage']:.1f}%)")
print(f"  Negative: {summary['negative_count']}")
print(f"  Neutral: {summary['neutral_count']}")
print(f"  Avg sentiment: {summary['avg_sentiment_score']:.2f}")

# Get stats
stats = prod_analyzer.get_stats()
print(f"\nSystem Statistics:")
print(f"  Total analyses: {stats['total_analyses']}")
print(f"  Avg confidence: {stats['avg_confidence']:.2f}")
print(f"  Low confidence rate: {stats['low_confidence_rate']:.1%}")

Best Practices¶

1. Model Selection¶

Binary: DistilBERT-SST-2 (fast, accurate)
Multi-class: RoBERTa-base-sentiment (3-class)
Fine-grained: BERT-multilingual-sentiment (5-star)
Domain-specific: Fine-tune on your domain (finance, healthcare)
Multilingual: XLM-RoBERTa for non-English text

2. Data Preparation¶

Handle emojis and emoticons (convert or keep)
Normalize text (URLs, mentions, hashtags)
Balance classes during training
Augment with back-translation or paraphrasing

3. Training Tips¶

Use SST-2, IMDb, or Yelp datasets
Fine-tune pretrained models (BERT, RoBERTa)
Monitor per-class F1 scores
Handle class imbalance (weighted loss, oversampling)
Use ensemble models for production

4. Production Optimization¶

Cache predictions for identical texts
Batch process for efficiency
Set confidence thresholds for manual review
Monitor sentiment distribution over time
A/B test model updates

Common Challenges¶

Sarcasm: “Oh great, another broken product” (positive words, negative intent)
Context: “This phone is sick!” (slang positive)
Neutrality: “The product arrived on time.” (factual, not sentiment)
Mixed sentiment: “Great quality but terrible price.”
Domain shift: Finance sentiment ≠ restaurant sentiment

Evaluation Metrics¶

Accuracy: Overall correctness
Precision/Recall/F1: Per-class performance
Macro F1: Average F1 across classes (for imbalanced data)
Confusion matrix: Identify systematic errors
Error analysis: Manual review of mistakes

Key Takeaways¶

✅ Fine-tuned BERT/RoBERTa achieves 90%+ accuracy

✅ Aspect-based sentiment provides deeper insights

✅ Sarcasm and context are major challenges

✅ Domain-specific fine-tuning significantly improves results

✅ Monitor confidence scores for quality control

✅ Aggregate metrics reveal trends over time

Next: 05_information_extraction.ipynb - Information Extraction