junhua · October 8, 2025 12:19
diff --git a/ICT3113-OPT-RAG-EVAL.json b/ICT3113-OPT-RAG-EVAL.json
 [
    {
      "q": "What is NLP and what are its main goals?",
      "answer": "NLP (Natural Language Processing) enables machines to understand, interpret, and generate human language, for tasks like translation, summarization, and sentiment analysis.",
      "must": ["NLP", "human language", "understand"]
    },
    {
      "q": "Describe the traditional NLP pipeline stages.",
      "answer": "Stages include text preprocessing (tokenization, cleaning), feature extraction, modeling, evaluation and postprocessing.",
      "must": ["preprocessing", "feature extraction", "modeling"]
    },
    {
      "q": "How did rule-based approaches dominate early NLP methods?",
      "answer": "They relied on handcrafted linguistic rules, grammars, and lexicons, often brittle and labor-intensive.",
      "must": ["rule-based", "lexicon", "grammar"]
    },
    {
      "q": "Why did statistical and machine learning methods become dominant in NLP?",
      "answer": "They enable models to learn from data, handle large corpora, and generalize better than rigid rules.",
      "must": ["statistical", "learning", "generalize"]
    },
    {
      "q": "What is the paradigm shift introduced by modern LLMs?",
      "answer": "From task-specific models to general-purpose LLMs that can adapt via prompts/fine-tuning across many tasks.",
      "must": ["task-specific", "general-purpose", "prompt"]
    },
    {
      "q": "Define zero-shot learning in the context of NLP.",
      "answer": "Zero-shot is when a model performs a task it was not explicitly trained on, using only its pretrained knowledge and prompt instruction.",
      "must": ["zero-shot", "pretrained", "prompt"]
    },
    {
      "q": "What is few-shot prompting and how is it used?",
      "answer": "Few-shot includes giving a few examples in the prompt to guide the model toward the desired behavior on a new task.",
      "must": ["few-shot", "prompt", "examples"]
    },
    {
      "q": "List some application areas of NLP today.",
      "answer": "Applications include machine translation, question answering, summarization, sentiment analysis, chatbots, information extraction.",
      "must": ["machine translation", "question answering", "summarization"]
    },
    {
      "q": "What are named entity recognition (NER) and part-of-speech (POS) tagging?",
      "answer": "NER identifies and classifies named entities (persons, locations); POS tagging labels words with grammatical categories (noun, verb, etc.).",
      "must": ["named entity recognition", "part-of-speech", "classification"]
    },
    {
      "q": "How does the rise of deep learning change NLP capabilities?",
      "answer": "Deep learning allows end-to-end models, capturing nonlinear semantic patterns and contextual embeddings, improving many tasks.",
      "must": ["deep learning", "embeddings", "contextual"]
    },
    {
      "q": "What is the role of the ‘lab’ sessions in Week 1?",
      "answer": "Lab introduces hands-on practice: tokenization with NLTK, POS tagging, embedding visualization, basic text generation.",
      "must": ["tokenization", "POS tagging", "embedding visualization"]
    },
    {
      "q": "What is a challenge of rule-based NLP systems?",
      "answer": "They have poor scalability, brittle rules, difficulty in handling ambiguity and unseen phenomena.",
      "must": ["scalability", "ambiguity", "unseen"]
    },
    {
      "q": "Why is generalization more difficult in language than in simpler domains?",
      "answer": "Because language has high variability, compositionality, ambiguity, and open vocabulary.",
      "must": ["variability", "ambiguity", "compositionality"]
    },
    {
      "q": "What is the difference between input representation and model architecture in NLP?",
      "answer": "Input representation handles how text is encoded (tokens, embeddings), whereas architecture is the model structure (e.g. transformer).",
      "must": ["representation", "architecture", "token"]
    },
    {
      "q": "Explain the concept of distributional hypothesis in NLP.",
      "answer": "Words that appear in similar contexts have similar meanings—this underlies embedding techniques.",
      "must": ["distributional hypothesis", "context", "meaning"]
    },
    {
      "q": "How did corpus availability affect NLP research evolution?",
      "answer": "More data (corpora) enabled statistical methods and neural models; lack of data earlier limited performance.",
      "must": ["corpus", "data", "statistical"]
    },
    {
      "q": "What is the general purpose of evaluation in NLP?",
      "answer": "To measure model performance, compare methods, diagnose errors, and guide improvements.",
      "must": ["evaluation", "performance", "diagnose"]
    },
    {
      "q": "Why do we integrate application stories in the introductory lecture?",
      "answer": "To motivate students, illustrate real-world impact, and provide context for theory.",
      "must": ["application", "motivation", "real-world"]
    },
    {
      "q": "What is a limitation of purely data-driven methods in NLP?",
      "answer": "They may lack interpretability, suffer from bias, require large data, and struggle with rare phenomena.",
      "must": ["interpretability", "bias", "rare"]
    },
    {
      "q": "How do LLMs relate to the content of Week 1 Session 2?",
      "answer": "Session 2 introduces the revolution brought by LLMs, paradigm shift, zero/few-shot learning, modern architectures.",
      "must": ["LLM", "paradigm shift", "zero-shot"]
    },
  
    {
      "q": "Define tokenization and explain its significance in NLP preprocessing.",
      "answer": "Tokenization splits raw text into units (words, subwords) which serve as inputs to downstream models.",
      "must": ["tokenization", "preprocessing", "subword"]
    },
    {
      "q": "What is normalization in text preprocessing? Give examples.",
      "answer": "Normalization standardizes text: lowercasing, unicode normalization, punctuation removal, number normalization.",
      "must": ["normalization", "lowercasing", "unicode"]
    },
    {
      "q": "Compare stemming and lemmatization.",
      "answer": "Stemming applies heuristic truncation (may produce nonwords); lemmatization uses morphological analysis to produce dictionary form.",
      "must": ["stemming", "lemmatization", "morphological"]
    },
    {
      "q": "Why remove stop words? What is a disadvantage?",
      "answer": "Removing stop words reduces noise and model size, but may lose function words essential to meaning in some contexts.",
      "must": ["stop words", "noise", "meaning"]
    },
    {
      "q": "What issues arise when tokenizing Korean or Chinese?",
      "answer": "They lack explicit whitespace segmentation, have complex morphology, so tokenization requires special methods (morpheme, BPE).",
      "must": ["Korean", "segmentation", "morphology"]
    },
    {
      "q": "What is Byte Pair Encoding (BPE)?",
      "answer": "BPE is a subword tokenization method merging frequent symbol pairs iteratively to balance vocabulary and coverage.",
      "must": ["BPE", "subword", "vocabulary"]
    },
    {
      "q": "Explain WordPiece tokenization.",
      "answer": "WordPiece builds subwords by selecting highest likelihood splits under a language model, commonly used in BERT.",
      "must": ["WordPiece", "subword", "BERT"]
    },
    {
      "q": "What problem do subword tokenizers solve compared to word-level tokenizers?",
      "answer": "They reduce out-of-vocab issues while preserving meaningful units and controlling vocabulary size.",
      "must": ["out-of-vocab", "vocabulary", "subword"]
    },
    {
      "q": "How is Unicode normalization useful?",
      "answer": "It ensures canonical forms (e.g. accents) are consistently represented (NFC, NFD), improving matching and cleaning.",
      "must": ["Unicode", "normalization", "canonical"]
    },
    {
      "q": "What is lowercasing and when is it harmful?",
      "answer": "Lowercasing makes case-insensitive, reducing sparsity; but harmful if case carries information (e.g. named entities).",
      "must": ["lowercasing", "case", "information"]
    },
    {
      "q": "Why is punctuation handling nontrivial in preprocessing?",
      "answer": "Because punctuation may carry semantic cues (e.g. “?”) or part of tokens (e.g. “U.S.A.”), so naive removal can harm meaning.",
      "must": ["punctuation", "semantic", "token"]
    },
    {
      "q": "What is the role of a vocabulary in tokenization?",
      "answer": "Vocabulary maps tokens/subwords to indices; its size and coverage influence model capacity and OOV handling.",
      "must": ["vocabulary", "tokens", "OOV"]
    },
    {
      "q": "How do you handle digits and numbers during preprocessing?",
      "answer": "You may normalize (e.g. replace with <NUM>), separate digits, or keep them if domain-specific.",
      "must": ["digits", "normalize", "domain"]
    },
    {
      "q": "What is text cleaning? Examples of noisy tokens?",
      "answer": "Cleaning removes unwanted artifacts: HTML tags, URLs, emojis, extra whitespace, control characters.",
      "must": ["cleaning", "URLs", "emojis"]
    },
    {
      "q": "When might you not remove stop words?",
      "answer": "In tasks like machine translation or reading comprehension where function words matter.",
      "must": ["translation", "function words", "comprehension"]
    },
    {
      "q": "What is tokenization ambiguity? Example.",
      "answer": "Ambiguity arises when segmentation is unclear (e.g. “therapist” vs “the rapist”), requiring context awareness.",
      "must": ["ambiguity", "segmentation", "context"]
    },
    {
      "q": "What is a lexicon and how is it used in preprocessing?",
      "answer": "A lexicon is a dictionary of words and morphological forms; used for normalization, lemmatization, POS dictionaries.",
      "must": ["lexicon", "dictionary", "lemmatization"]
    },
    {
      "q": "Why is reproducibility important in preprocessing pipelines?",
      "answer": "So results are consistent across runs; preprocessing decisions (tokenization, normalization) must be deterministic.",
      "must": ["reproducibility", "deterministic", "consistency"]
    },
    {
      "q": "How do you evaluate or debug preprocessing quality?",
      "answer": "By sampling tokenization outputs, comparing to gold standard, checking error cases and rare tokens.",
      "must": ["evaluation", "debug", "error cases"]
    },
  
    {
      "q": "What is a language model (LM)?",
      "answer": "A language model assigns probabilities to sequences of tokens or predicts next tokens in text.",
      "must": ["language model", "probabilities", "sequence"]
    },
    {
      "q": "Explain an n-gram model.",
      "answer": "An n-gram model approximates probability of a token given (n-1) preceding tokens, using frequency counts.",
      "must": ["n-gram", "probability", "counts"]
    },
    {
      "q": "What is the Markov assumption in n-gram modeling?",
      "answer": "That the probability of a token depends only on a limited history of length (n-1), not full context.",
      "must": ["Markov assumption", "history", "context"]
    },
    {
      "q": "Define perplexity and how it's computed.",
      "answer": "Perplexity = exp(−(1/N) * log-likelihood); lower means the model predicts better.",
      "must": ["perplexity", "log-likelihood", "evaluation"]
    },
    {
      "q": "Why smoothing is needed in n-gram models? Name one method.",
      "answer": "To assign nonzero probabilities to unseen n-grams; e.g. Laplace (add-one), Kneser-Ney smoothing.",
      "must": ["smoothing", "unseen", "Kneser-Ney"]
    },
    {
      "q": "What is backoff and interpolation in smoothing?",
      "answer": "Backoff uses lower-order model when higher-order is unseen; interpolation combines multiple orders weighted.",
      "must": ["backoff", "interpolation", "lower-order"]
    },
    {
      "q": "What are the main limitations of n-gram models?",
      "answer": "Sparsity, limited context, poor generalization, large parameter space for high n.",
      "must": ["sparsity", "limited context", "generalization"]
    },
    {
      "q": "Why are language models useful in downstream tasks?",
      "answer": "They provide prior probabilities, help in scoring candidate outputs, or as components in sequence models.",
      "must": ["prior", "scoring", "downstream"]
    },
    {
      "q": "How do you estimate probabilities from counts in n-gram models?",
      "answer": "Use maximum likelihood estimation: (P(w_n | w_{n-(n-1)}) = \\frac{\\text{count}(w_{n-(n-1)},w_n)}{\\text{count}(w_{n-(n-1)})}\\).",
      "must": ["maximum likelihood", "counts", "conditional"]
    },
    {
      "q": "What is the curse of dimensionality in language modeling?",
      "answer": "The number of possible n-grams grows exponentially, causing data sparsity and storage issues.",
      "must": ["dimensionality", "exponential", "sparsity"]
    },
    {
      "q": "What is a skip-gram and how does it differ from n-gram counting?",
      "answer": "Skip-gram (in embeddings) predicts context words skipping intervening ones; not same as statistical n-gram counts.",
      "must": ["skip-gram", "context", "prediction"]
    },
    {
      "q": "When would perplexity mislead as a metric?",
      "answer": "When models assign high probabilities to frequent tokens but perform poorly on rarer or downstream tasks.",
      "must": ["mislead", "frequent tokens", "downstream"]
    },
    {
      "q": "What is the relation between cross-entropy and perplexity?",
      "answer": "Perplexity = 2^{cross-entropy} (if log base 2), so lower cross-entropy → lower perplexity.",
      "must": ["cross-entropy", "perplexity", "relation"]
    },
    {
      "q": "How to compare two language models using perplexity?",
      "answer": "Compute perplexity on the same held-out set; lower perplexity indicates better predictions.",
      "must": ["compare", "held-out", "lower"]
    },
    {
      "q": "What is an advantage of statistical LMs vs rule-based for prediction?",
      "answer": "They can estimate probabilities over alternatives and generalize from data instead of fixed rules.",
      "must": ["probability", "generalize", "data"]
    },
    {
      "q": "What role does smoothing play in generalization?",
      "answer": "Smoothing distributes probability mass to unseen events, enabling better generalization to unseen cases.",
      "must": ["smoothing", "distribution", "generalization"]
    },
    {
      "q": "Why can neural language models outperform n-gram models?",
      "answer": "Neural models embed context, share parameters, handle longer context, and generalize across vocabulary.",
      "must": ["neural", "embed", "generalize"]
    },
    {
      "q": "What is teacher forcing during LM training?",
      "answer": "At training time, the model is fed the ground-truth previous token instead of its own prediction.",
      "must": ["teacher forcing", "training", "ground-truth"]
    },
  
    {
      "q": "What is a word embedding and why is it useful?",
      "answer": "A dense vector representation capturing semantics, enabling similarity and compact encoding compared to sparse one-hot.",
      "must": ["word embedding", "dense", "semantics"]
    },
    {
      "q": "Describe the one-hot representation and its downsides.",
      "answer": "One-hot is a sparse vector with one “1” per word; downsides are high dimensionality and no notion of similarity.",
      "must": ["one-hot", "sparse", "dimensionality"]
    },
    {
      "q": "How does Word2Vec’s skip-gram model work?",
      "answer": "Given a target word, skip-gram predicts surrounding context words by maximizing softmax probabilities or using negative sampling.",
      "must": ["skip-gram", "Word2Vec", "context"]
    },
    {
      "q": "How does the Word2Vec CBOW variant work?",
      "answer": "CBOW predicts the target word from surrounding context words by averaging context embeddings and using softmax.",
      "must": ["CBOW", "context", "predict"]
    },
    {
      "q": "What is negative sampling and why is it used?",
      "answer": "It approximates full softmax by sampling negative (non-context) words, reducing computational cost.",
      "must": ["negative sampling", "softmax", "approximation"]
    },
    {
      "q": "Explain hierarchical softmax.",
      "answer": "Hierarchical softmax replaces flat softmax with binary tree structure to reduce cost of probability computation for large vocabularies.",
      "must": ["hierarchical softmax", "binary tree", "cost"]
    },
    {
      "q": "What is the role of context window size?",
      "answer": "Window size determines how many neighbors are considered; too small misses semantics, too large introduces noise.",
      "must": ["window size", "context", "noise"]
    },
    {
      "q": "What is the evaluation of embeddings (intrinsic vs extrinsic)?",
      "answer": "Intrinsic: similarity / analogy tasks; extrinsic: performance gain in downstream tasks.",
      "must": ["intrinsic", "extrinsic", "downstream"]
    },
    {
      "q": "What is GloVe and how is it different from Word2Vec?",
      "answer": "GloVe uses global co-occurrence counts and factorizes log co-occurrence matrix; Word2Vec is local context prediction.",
      "must": ["GloVe", "co-occurrence", "prediction"]
    },
    {
      "q": "What is FastText, and how does it handle OOV words?",
      "answer": "FastText represents words as sum of subword (n-gram) embeddings, enabling representation for unseen words.",
      "must": ["FastText", "subword", "OOV"]
    },
    {
      "q": "Why do embeddings reflect semantic similarity?",
      "answer": "Because they are trained to place contextually similar words close in vector space under geometric constraints.",
      "must": ["semantic similarity", "vector space", "geometry"]
    },
    {
      "q": "What is the analogy task (king – man + woman)? Why is it used?",
      "answer": "Analogy tests vector arithmetic properties (king – man + woman = queen) to evaluate embedding semantics.",
      "must": ["analogy", "vector arithmetic", "queen"]
    },
    {
      "q": "What is cosine similarity? How used in embedding space?",
      "answer": "Cosine similarity = dot(u, v) / (|u||v|), measures angular closeness; used to measure embedding similarity.",
      "must": ["cosine similarity", "dot", "magnitude"]
    },
    {
      "q": "What is the effect of normalizing embedding vectors?",
      "answer": "Normalization ensures length invariance, so comparisons focus on direction/semantic, not magnitude.",
      "must": ["normalization", "direction", "magnitude"]
    },
    {
      "q": "What is the curse of hubness in high-dimensional embeddings?",
      "answer": "Some points (hubs) appear overly often as nearest neighbors, biasing similarity results.",
      "must": ["hubness", "nearest neighbor", "bias"]
    },
    {
      "q": "What is embedding projection and dimensionality reduction (e.g. PCA, t-SNE)?",
      "answer": "They reduce embedding dimensions (for visualization or efficiency) while preserving structure (variance or neighborhoods).",
      "must": ["PCA", "t-SNE", "dimensionality"]
    },
    {
      "q": "How can embedding drift happen over time?",
      "answer": "As corpora evolve, retraining may shift embeddings such that old and new embeddings become incompatible.",
      "must": ["drift", "retraining", "compatibility"]
    },
    {
      "q": "Why integrate embeddings into downstream NLP tasks (e.g. classification)?",
      "answer": "Because embeddings serve as rich features capturing semantic relations, improving model inputs.",
      "must": ["features", "semantic relations", "downstream"]
    },
  
    {
      "q": "What is the attention mechanism and why is it important?",
      "answer": "Attention computes weighted interactions between elements in a sequence, enabling models to focus on relevant parts.",
      "must": ["attention", "weighted", "focus"]
    },
    {
      "q": "Define self-attention in the transformer context.",
      "answer": "Self-attention lets each token attend to all others (or itself) in the same sequence via queries, keys, values.",
      "must": ["self-attention", "queries", "values"]
    },
    {
      "q": "What are queries, keys, and values in attention?",
      "answer": "They are linear projections of input: query matches keys, weights values to compute attention output.",
      "must": ["queries", "keys", "values"]
    },
    {
      "q": "Explain scaled dot-product attention.",
      "answer": "Attention = softmax((QKᵀ)/√d_k) V, where scaling by √d_k stabilizes gradients for large dimension.",
      "must": ["scaled dot-product", "softmax", "scale"]
    },
    {
      "q": "What is multi-head attention and its benefit?",
      "answer": "Multiple parallel attention heads allow the model to capture different types of relationships in different subspaces.",
      "must": ["multi-head", "parallel", "subspaces"]
    },
    {
      "q": "Describe the architecture of a Transformer encoder layer.",
      "answer": "Encoder = multi-head self-attention + residual + layer norm + feedforward + residual + layer norm.",
      "must": ["encoder", "residual", "layer norm"]
    },
    {
      "q": "What is positional encoding and why is it used?",
      "answer": "Positional encoding injects token order information (sinusoids or learnable) because self-attention is order-invariant.",
      "must": ["positional encoding", "order", "invariant"]
    },
    {
      "q": "How does the Transformer decoder differ from encoder?",
      "answer": "Decoder has masked self-attention, encoder-decoder attention, plus feedforward and residuals.",
      "must": ["masked self-attention", "encoder-decoder", "decoder"]
    },
    {
      "q": "What is masking in the decoder self-attention?",
      "answer": "Mask prevents attending to future tokens, enforcing autoregressive prediction during training/inference.",
      "must": ["masking", "future tokens", "autoregressive"]
    },
    {
      "q": "What is the role of feedforward network inside transformer layers?",
      "answer": "A two-layer MLP applied per token to transform representations nonlinearly, with residual connection.",
      "must": ["feedforward", "MLP", "residual"]
    },
    {
      "q": "Why residual connections and layer normalization are important?",
      "answer": "Residuals ease gradient flow; layer norm stabilizes training and ensures normalized activations.",
      "must": ["residual", "layer norm", "stability"]
    },
    {
      "q": "What is BERT and how does it use transformer architecture?",
      "answer": "BERT is a bidirectional transformer pretrained with masked LM and next sentence prediction objectives.",
      "must": ["BERT", "bidirectional", "masked LM"]
    },
    {
      "q": "What is the difference between BERT and GPT architectures?",
      "answer": "BERT is encoder-only and bidirectional, GPT is decoder-only and autoregressive (unidirectional).",
      "must": ["encoder-only", "decoder-only", "autoregressive"]
    },
    {
      "q": "How do you visualize attention weights for interpretability?",
      "answer": "You map attention matrices to heatmaps over token pairs to see which tokens attend to which.",
      "must": ["visualize", "attention weights", "heatmap"]
    },
    {
      "q": "What is the transformer’s capacity to model long-range dependencies?",
      "answer": "Self-attention attends globally, enabling capturing distant dependencies better than RNNs.",
      "must": ["long-range", "global attention", "dependencies"]
    },
    {
      "q": "What is the computational complexity of self-attention in sequence length?",
      "answer": "It is O(n²) in sequence length due to pairwise dot products, which may be costly for large n.",
      "must": ["O(n^2)", "complexity", "sequence length"]
    },
    {
      "q": "Name one efficient attention variant for long sequences.",
      "answer": "Variants include Linformer, Performer, Longformer, Reformer, sparse attention mechanisms.",
      "must": ["Longformer", "sparse attention", "efficient"]
    },
    {
      "q": "How do you fine-tune a transformer for a downstream task?",
      "answer": "Add a task-specific head (e.g. classification) on top of transformer output and train with task loss on labeled data.",
      "must": ["fine-tune", "task-specific head", "loss"]
    },
  
    {
      "q": "What is an LLM API and why is it useful?",
      "answer": "LLM APIs let you access large pretrained models (like GPT) over network, avoiding heavy local compute.",
      "must": ["LLM API", "pretrained model", "network"]
    },
    {
      "q": "How do you call the OpenAI API to generate text?",
      "answer": "You send a prompt, set parameters (model, max_tokens, temperature), and receive generated text.",
      "must": ["prompt", "temperature", "max_tokens"]
    },
    {
      "q": "What is temperature in sampling methods?",
      "answer": "Temperature controls randomness: high temperature flattens distribution (more randomness), low sharpens peak.",
      "must": ["temperature", "sampling", "distribution"]
    },
    {
      "q": "What is top-k sampling?",
      "answer": "Select among top k highest-probability tokens and sample only from them, excluding long tail.",
      "must": ["top-k", "sampling", "probability"]
    },
    {
      "q": "What is nucleus (top-p) sampling?",
      "answer": "Choose smallest set of tokens whose cumulative probability ≥ p, sample from them.",
      "must": ["top-p", "nucleus", "cumulative"]
    },
    {
      "q": "What is greedy decoding?",
      "answer": "Always pick the highest-probability token at each step; deterministic but may be suboptimal.",
      "must": ["greedy", "highest probability", "deterministic"]
    },
    {
      "q": "What is beam search and how does it work?",
      "answer": "Beam search keeps top B candidate sequences at each step, exploring alternatives to maximize total score.",
      "must": ["beam search", "candidates", "score"]
    },
    {
      "q": "Why include randomness (sampling) in generation?",
      "answer": "To increase diversity and avoid repetitive or overly deterministic output.",
      "must": ["diversity", "randomness", "avoid repetition"]
    },
    {
      "q": "How does token limit affect API responses?",
      "answer": "Max tokens constrains output length; exceeding or misestimating may truncate responses.",
      "must": ["token limit", "truncate", "length"]
    },
    {
      "q": "What is stop sequence in generation APIs?",
      "answer": "A sequence of characters which, if generated, signals the model to stop output early.",
      "must": ["stop sequence", "terminate", "output"]
    },
    {
      "q": "How do you handle over-long generation from LLM APIs?",
      "answer": "You set max_tokens, use stop sequences, or prune output programmatically.",
      "must": ["max_tokens", "prune", "stop sequence"]
    },
    {
      "q": "Why monitor token usage and cost when using APIs?",
      "answer": "Because billing is often per token; inefficient prompts or long responses increase cost.",
      "must": ["token usage", "cost", "billing"]
    },
    {
      "q": "What is prompt chaining or iterative prompting?",
      "answer": "Breaking a task into subprompts and chaining outputs to solve complex tasks step by step.",
      "must": ["prompt chaining", "subprompts", "iterative"]
    },
    {
      "q": "What is context window and how does it limit LLMs?",
      "answer": "The maximum token length model can condition on; longer inputs beyond this are truncated or dropped.",
      "must": ["context window", "token length", "truncate"]
    },
    {
      "q": "How can you mitigate context window overflow?",
      "answer": "By chunking input, summarizing, or sliding window techniques.",
      "must": ["chunking", "summarizing", "sliding window"]
    },
    {
      "q": "What is a “prompt injection” risk?",
      "answer": "Attackers may inject malicious instructions in user input that are executed by the LLM.",
      "must": ["prompt injection", "attack", "instructions"]
    },
    {
      "q": "How do you evaluate generated text quality?",
      "answer": "Using human evaluation, BLEU, ROUGE, coherence, relevance, and error analysis.",
      "must": ["BLEU", "ROUGE", "coherence"]
    },
    {
      "q": "What is temperature = 0 behavior in sampling?",
      "answer": "Equivalent to greedy decoding: always choose highest-probability token.",
      "must": ["temperature 0", "greedy", "deterministic"]
    },
  
    {
      "q": "What is the focus of the Nobel Physics special lecture in this course?",
      "answer": "Exploring foundational discoveries in neural networks and deep learning evolution.",
      "must": ["neural networks", "deep learning", "foundational"]
    },
    {
      "q": "How do advances in physics relate to AI model development historically?",
      "answer": "Physics advances in optimization, statistics, and signal processing influenced learning algorithms and architectures.",
      "must": ["optimization", "statistics", "algorithms"]
    },
    {
      "q": "What is the connection between energy minimization (physics) and loss minimization in ML?",
      "answer": "Training neural networks often corresponds to minimizing an energy or potential function analogous to physics systems.",
      "must": ["energy minimization", "loss function", "analogy"]
    },
    {
      "q": "What is the theme of the Nobel Chemistry special lecture?",
      "answer": "Computational protein design, structure prediction using AI, and the intersection of chemistry and ML.",
      "must": ["protein design", "structure prediction", "AI"]
    },
    {
      "q": "How does protein folding prediction benefit from ML techniques?",
      "answer": "Models like AlphaFold use deep learning to predict 3D structure from sequence, leveraging patterns in known structures.",
      "must": ["AlphaFold", "sequence", "structure"]
    },
    {
      "q": "Name a key challenge in computational chemistry that AI helps address.",
      "answer": "Large combinatorial search space of molecules, expensive physics-based simulation, and low-data regimes.",
      "must": ["combinatorial", "simulation", "low-data"]
    },
    {
      "q": "Why are special lectures included in an NLP/AI curriculum?",
      "answer": "To expose students to interdisciplinary impact of AI, inspire breadth, and connect theory to frontier areas.",
      "must": ["interdisciplinary", "impact", "frontier"]
    },
    {
      "q": "What is the benefit of linking AI with other sciences (e.g. chemistry, physics)?",
      "answer": "It fosters cross-domain innovation, applies methods across fields, and yields richer research directions.",
      "must": ["cross-domain", "innovation", "research"]
    },
    {
      "q": "How might advances in neural architectures from physics insights inform future NLP models?",
      "answer": "Physics-inspired architectures (e.g. energy-based models) or optimization techniques may improve model design.",
      "must": ["energy-based", "architectures", "optimization"]
    },
    {
      "q": "What ethical or societal factor arises when AI meets scientific domains?",
      "answer": "Issues include reproducibility, interpretability, domain bias, and misuse of results in high-stakes areas.",
      "must": ["reproducibility", "interpretability", "bias"]
    },
    {
      "q": "Give an example of AI-driven discovery in chemistry.",
      "answer": "Designing novel molecules, drug discovery, enzyme engineering, materials with desired properties.",
      "must": ["molecules", "drug discovery", "enzyme"]
    },
    {
      "q": "Describe how ML models can approximate expensive physics simulations.",
      "answer": "Using surrogate models, neural approximators that learn mapping from inputs to simulation outputs, reducing cost.",
      "must": ["surrogate models", "approximation", "simulation"]
    },
    {
      "q": "What is transfer learning and how might it apply across domains (e.g. NLP to chemistry)?",
      "answer": "Transfer learning reuses pretrained features from one domain to another, aiding low-data tasks.",
      "must": ["transfer learning", "pretrained", "low-data"]
    },
    {
      "q": "Why is model interpretability especially critical in scientific domains?",
      "answer": "Because decisions (e.g. drug design) require trust, explanation, and verification beyond black-box predictions.",
      "must": ["interpretability", "trust", "verification"]
    },
    {
      "q": "How do generative models (e.g. GANs, diffusion) relate to scientific applications?",
      "answer": "They generate novel samples (e.g. molecules, materials) under constraints learned from data.",
      "must": ["generative models", "GAN", "diffusion"]
    },
    {
      "q": "What is the frontier challenge when combining AI models with physical laws?",
      "answer": "Incorporating domain constraints, preserving physical consistency, and ensuring generalization beyond training data.",
      "must": ["constraints", "consistency", "generalization"]
    },
    {
      "q": "What role do these special lectures serve in project ideation?",
      "answer": "They spark interdisciplinary project ideas and broaden students’ perspectives beyond pure NLP.",
      "must": ["project", "interdisciplinary", "perspective"]
    },
  
    {
      "q": "What is prompt engineering?",
      "answer": "The practice of designing prompts to guide LLMs to produce desired outputs, influencing behavior and correctness.",
      "must": ["prompt engineering", "design", "desired outputs"]
    },
    {
      "q": "Define zero-shot prompting and few-shot prompting.",
      "answer": "Zero-shot gives no examples and relies on model’s pretrained ability; few-shot includes example pairs to steer output.",
      "must": ["zero-shot", "few-shot", "examples"]
    },
    {
      "q": "What is chain-of-thought prompting?",
      "answer": "Asking the model to explain reasoning steps explicitly in the prompt to improve logical task performance.",
      "must": ["chain-of-thought", "reasoning", "prompt"]
    },
    {
      "q": "Why is prompt phrasing sensitive? Give an example.",
      "answer": "Small changes may lead to drastically different behavior; e.g. “List reasons why X” vs “Explain X”.",
      "must": ["phrasing", "sensitive", "behavior"]
    },
    {
      "q": "What is prompt injection and how to guard against it?",
      "answer": "Adversarial text input trying to override instructions; guard via input sanitization and robust prompt templates.",
      "must": ["prompt injection", "adversarial", "sanitize"]
    },
    {
      "q": "What is the verbalizer in prompt templates (e.g. in prompt tuning)?",
      "answer": "It maps model output tokens back to task labels (e.g. “positive” → label 1).",
      "must": ["verbalizer", "mapping", "labels"]
    },
    {
      "q": "What is prompt tuning / soft prompt?",
      "answer": "Learned continuous prompt embeddings prepended to input, rather than discrete text prompts.",
      "must": ["prompt tuning", "continuous", "embedding"]
    },
    {
      "q": "What is instruction tuning?",
      "answer": "Training the model with diverse natural language instructions so it generalizes to new tasks.",
      "must": ["instruction tuning", "diverse", "generalize"]
    },
    {
      "q": "What is the role of demonstrations in prompting?",
      "answer": "Demonstrations (examples) help the model see the desired input-output mapping and reduce ambiguity.",
      "must": ["demonstrations", "mapping", "ambiguity"]
    },
    {
      "q": "How does few-shot prompting reduce hallucination risk?",
      "answer": "By providing examples, it constrains output patterns, reducing freedom to hallucinate unrelated content.",
      "must": ["hallucination", "examples", "constrain"]
    },
    {
      "q": "What is iterative refinement in prompts?",
      "answer": "Using multiple prompt stages: ask, review, revise output to improve correctness.",
      "must": ["iterative", "refinement", "review"]
    },
    {
      "q": "What is self-consistency prompting?",
      "answer": "Generate multiple reasoning paths and aggregate consensus to improve accuracy.",
      "must": ["self-consistency", "multiple paths", "consensus"]
    },
    {
      "q": "What is a prompt template? Example elements?",
      "answer": "A blueprint with placeholders (e.g. {input}, {instruction}, {examples}); ensures structure across prompts.",
      "must": ["template", "placeholder", "structure"]
    },
    {
      "q": "Why is prompt evaluation (A/B testing) important?",
      "answer": "To compare prompt effectiveness, measure output quality, and choose best prompt variant.",
      "must": ["evaluation", "A/B", "quality"]
    },
    {
      "q": "What is meta-prompting?",
      "answer": "Asking the model to generate or refine prompts itself as part of the pipeline.",
      "must": ["meta-prompting", "generate", "refine"]
    },
    {
      "q": "What is the challenge of prompt generalization across tasks?",
      "answer": "A prompt performing well on one task may fail on another; general templates are hard to design.",
      "must": ["generalization", "task", "templates"]
    },
    {
      "q": "How do you combine prompt engineering with fine-tuning?",
      "answer": "You can fine-tune with instructions and then further improve via prompting to adapt behavior.",
      "must": ["fine-tuning", "prompting", "behavior"]
    },
    {
      "q": "What is zero-shot chain-of-thought prompting? Use case.",
      "answer": "Asking “Let’s think step by step” even without examples to encourage reasoning in zero-shot mode.",
      "must": ["zero-shot", "chain-of-thought", "step by step"]
    },
  
    {
      "q": "What is an LLM-based Q&A system?",
      "answer": "A system that uses a large language model to answer user queries, often retrieving context or documents first.",
      "must": ["Q&A", "LLM", "retrieve"]
    },
    {
      "q": "Why use a vector database in Q&A systems?",
      "answer": "To store dense embeddings of documents and support fast similarity search (nearest neighbor).",
      "must": ["vector database", "embeddings", "similarity"]
    },
    {
      "q": "What is FAISS and how is it used?",
      "answer": "FAISS is a library for efficient similarity search over large embedding collections. It enables fast vector search.",
      "must": ["FAISS", "similarity", "vectors"]
    },
    {
      "q": "What is retrieval-augmented generation (RAG)?",
      "answer": "RAG retrieves relevant documents from a corpus and uses them to condition the generation process, combining retrieval and generation.",
      "must": ["retrieval", "generation"]
    },
    {
      "q": "What is a knowledge graph and what is a triple?",
      "answer": "A knowledge graph is a graph of entities and relations; a triple is (subject, predicate, object).",
      "must": ["knowledge graph", "triple"]
    },
    {
      "q": "How do you integrate retrieval and LLM generation in a Q&A pipeline?",
      "answer": "Retrieve top-k documents, rank them, then feed them (as context) plus user query into the generation model.",
      "must": ["retrieve", "context", "generation"]
    },
    {
      "q": "What is the role of embeddings in retrieval-based Q&A?",
      "answer": "Convert text into vector space so semantically similar texts are close and can be retrieved by similarity search.",
      "must": ["embeddings", "vector space", "semantic"]
    },
    {
      "q": "What is dense retrieval vs sparse retrieval?",
      "answer": "Dense uses embeddings and vector similarity; sparse uses term-based indexing (e.g. BM25).",
      "must": ["dense retrieval", "sparse retrieval", "BM25"]
    },
    {
      "q": "What is BM25 scoring?",
      "answer": "A term-frequency / inverse-document-frequency based retrieval scoring algorithm for sparse retrieval.",
      "must": ["BM25", "term frequency", "inverse document frequency"]
    },
    {
      "q": "What is reranking in retrieval pipelines?",
      "answer": "After initial retrieval, use stronger models (cross-encoders) to re-score and reorder results for accuracy.",
      "must": ["reranking", "cross-encoder", "rescore"]
    },
    {
      "q": "Why limit context length when sending to LLM for Q&A?",
      "answer": "Because LLMs have finite context windows; too much text may exceed limit or dilute signal.",
      "must": ["context length", "limit", "signal"]
    },
    {
      "q": "What is hallucination in LLM-based Q&A?",
      "answer": "When the model generates plausible but incorrect or unsupported information.",
      "must": ["hallucination", "incorrect", "unsupported"]
    },
    {
      "q": "How to reduce hallucination in RAG systems?",
      "answer": "Use verified documents, answer filtering, attribution, or grounding mechanisms.",
      "must": ["grounding", "filtering", "attribution"]
    },
    {
      "q": "What is a system integration challenge in deployment of Q&A?",
      "answer": "Latency, scaling embedding search, prompt packaging, context management, error handling.",
      "must": ["latency", "scaling", "context"]
    },
    {
      "q": "How to evaluate Q&A system performance?",
      "answer": "Metrics: accuracy, F1, exact match, human judgment, response time, user satisfaction.",
      "must": ["accuracy", "F1", "exact match"]
    },
    {
      "q": "What is context window sliding or chunking for long documents?",
      "answer": "Break documents into overlapping windows or chunks and retrieve best chunks as context.",
      "must": ["chunking", "sliding window", "overlap"]
    },
    {
      "q": "How do you handle conflicting retrieved evidence?",
      "answer": "Use reranking, conflict resolution heuristics, or ask LLM to reconcile contradictions.",
      "must": ["conflict", "reranking", "reconcile"]
    },
    {
      "q": "What is multi-hop Q&A? Give an example.",
      "answer": "Answer that requires chaining reasoning across multiple documents (e.g. “Which author wrote book X and when?”).",
      "must": ["multi-hop", "chain", "reasoning"]
    },
    {
      "q": "Why is caching helpful in Q&A systems?",
      "answer": "To reuse prior retrieval/generation results, reduce latency, and save API cost.",
      "must": ["caching", "latency", "cost"]
    },
    {
      "q": "What is the cold-start issue in Q&A systems?",
      "answer": "When new topics lack indexed documents or embeddings, making retrieval ineffective initially.",
      "must": ["cold-start", "new topics", "ineffective"]
    },
  
    {
      "q": "What is Flask and how is it used in web development for NLP apps?",
      "answer": "Flask is a lightweight Python web framework; in NLP apps it's used to create endpoints (APIs) that call LLM or model backends.",
      "must": ["Flask", "Python", "endpoint"]
    },
    {
      "q": "What is Streamlit and when is it preferred?",
      "answer": "Streamlit is a framework for quickly building data apps with minimal code, useful for dashboards and prototyping.",
      "must": ["Streamlit", "dashboard", "prototyping"]
    },
    {
      "q": "Explain client-server architecture in a web app.",
      "answer": "Client (browser) sends requests to server; server processes, possibly invoking ML models, returns responses.",
      "must": ["client", "server", "requests"]
    },
    {
      "q": "How do you integrate LLM API calls into a Flask route?",
      "answer": "Within a route handler, accept user input, call LLM API (with prompt), and return response via JSON/html.",
      "must": ["route", "LLM API", "response"]
    },
    {
      "q": "What is CORS and why does it matter in web apps?",
      "answer": "Cross-Origin Resource Sharing controls which domains can call your API; misconfigured CORS may block access.",
      "must": ["CORS", "domains", "access"]
    },
    {
      "q": "What is deployment (production) vs development mode?",
      "answer": "Production uses secure, scalable config (e.g. Gunicorn, HTTPS), while development is simpler and local.",
      "must": ["production", "development", "secure"]
    },
    {
      "q": "What is containerization (Docker) in deployment?",
      "answer": "Encapsulating app + dependencies in container for portability, reproducibility, and isolation.",
      "must": ["Docker", "containerization", "portability"]
    },
    {
      "q": "How do you secure API keys in web apps?",
      "answer": "Use environment variables, secrets management, never embed keys in frontend or version control.",
      "must": ["API keys", "environment variables", "secrets"]
    },
    {
      "q": "What is input validation in web apps?",
      "answer": "Sanitize user input to prevent injection attacks (SQL, prompt, HTML) and ensure correct formats.",
      "must": ["validation", "sanitize", "injection"]
    },
    {
      "q": "How to handle errors and exceptions in web APIs?",
      "answer": "Use try/except, return HTTP error codes (e.g. 400, 500), log errors, and provide safe defaults.",
      "must": ["error", "HTTP code", "logging"]
    },
    {
      "q": "What is asynchronous request handling (async) in web frameworks?",
      "answer": "Using asynchronous handlers to process I/O-bound tasks (like API calls) without blocking server threads.",
      "must": ["asynchronous", "I/O", "non-blocking"]
    },
    {
      "q": "Why use caching on the server side?",
      "answer": "To store responses for repeated inputs, reduce API calls, and improve performance.",
      "must": ["caching", "performance", "reduce"]
    },
    {
      "q": "What are environment variables and config separation?",
      "answer": "Store sensitive settings (e.g. API endpoints, keys) outside code, loaded at runtime via config files or env vars.",
      "must": ["environment variables", "config", "sensitive"]
    },
    {
      "q": "Explain how to deploy a Flask app to a cloud service (e.g. Heroku, AWS)",
      "answer": "Push code, set env vars, configure web server (Gunicorn), setup domain/SSL, manage scaling.",
      "must": ["deploy", "Heroku", "Gunicorn"]
    },
    {
      "q": "How would you containerize and deploy an NLP web app with Docker + Kubernetes?",
      "answer": "Write Dockerfile, build image, deploy to cluster, manage scaling, load balancing, config maps.",
      "must": ["Kubernetes", "load balancing", "scaling"]
    },
    {
      "q": "What is rate limiting and why is it useful for APIs?",
      "answer": "Throttle request rates to protect backend from overload or abuse (e.g. too many LLM calls).",
      "must": ["rate limiting", "throttle", "protect"]
    },
    {
      "q": "What is logging and monitoring in deployed web apps?",
      "answer": "Track requests, errors, latency; use tools (e.g. Prometheus, Grafana) to observe system health.",
      "must": ["logging", "monitoring", "latency"]
    },
    {
      "q": "What is reverse proxy (e.g. Nginx) and how is it used?",
      "answer": "Proxy server in front of app server to handle SSL, load balancing, URL routing, static files.",
      "must": ["reverse proxy", "Nginx", "routing"]
    },
  
    {
      "q": "What is output structuring in LLM systems?",
      "answer": "Designing prompts or constraints so model outputs follow a desired structured format (JSON, XML, template).",
      "must": ["structuring", "format", "template"]
    },
    {
      "q": "What is a template-based output? Give an example.",
      "answer": "Predefined skeleton with placeholders, e.g. “{ “title”: __, “summary”: __ }”, forcing consistent layout.",
      "must": ["template", "placeholder", "layout"]
    },
    {
      "q": "How do JSON and XML formatting help in structured outputs?",
      "answer": "They provide machine-readable, schema-based formats that allow downstream parsing and validation.",
      "must": ["JSON", "XML", "schema"]
    },
    {
      "q": "Why use markdown or other markup languages in LLM output?",
      "answer": "They enable readable rich text (headings, lists, tables) while being parseable by applications.",
      "must": ["markdown", "markup", "readable"]
    },
    {
      "q": "What is controlling temperature and sampling in output control?",
      "answer": "Adjusting temperature or top-k/top-p changes output randomness, helping enforce more predictable structure.",
      "must": ["temperature", "sampling", "control"]
    },
    {
      "q": "What are stop sequences and length constraints in output control?",
      "answer": "Stop sequences specify termination tokens; length constraints (min/max) bound output size.",
      "must": ["stop sequence", "length constraint", "termination"]
    },
    {
      "q": "What strategies enforce output formats (e.g. JSON)?",
      "answer": "Prompt templates, post-checking & correction, mask tokens, few-shot examples with format enforcement.",
      "must": ["format enforcement", "post-checking", "mask"]
    },
    {
      "q": "How do you validate structured output from LLMs?",
      "answer": "Using JSON/XML schema validation, regex checks, fallback logic, or retries upon malformed output.",
      "must": ["validation", "schema", "fallback"]
    },
    {
      "q": "What error-handling techniques apply when output is malformed?",
      "answer": "Retry generation, fallback to simpler prompt, error correction heuristics, or human fallback.",
      "must": ["error-handling", "retry", "fallback"]
    },
    {
      "q": "What is the trade-off between strict formatting and natural response flexibility?",
      "answer": "Strict format reduces flexibility or fluency; flexible responses risk invalid structure or inconsistency.",
      "must": ["trade-off", "fluency", "structure"]
    },
    {
      "q": "How to parse LLM output reliably in applications?",
      "answer": "Use robust parsers tolerant to minor deviations, validate, and fallback to repair routines.",
      "must": ["parse", "robust", "repair"]
    },
    {
      "q": "Why is format consistency important in multi-turn systems?",
      "answer": "To maintain structured data across turns, avoid parser failures, and simplify downstream logic.",
      "must": ["consistency", "multi-turn", "parser"]
    },
    {
      "q": "What is a fallback mechanism for malformed outputs?",
      "answer": "If parsing fails, revert to plain text, ask model to reformat, or return default value.",
      "must": ["fallback", "parsing fails", "default"]
    },
    {
      "q": "What does advanced output control cover beyond fundamentals?",
      "answer": "Techniques like dynamic templates, in-prompt validators, constrained decoding, and self-checking loops.",
      "must": ["constrained decoding", "self-checking", "dynamic template"]
    },
    {
      "q": "How do you build a system for structured output in a project?",
      "answer": "Design templates, integrate validation modules, handle errors, and support multiple formats (JSON, XML).",
      "must": ["system", "validation module", "multiple formats"]
    },
    {
      "q": "What is the role of format enforcement in real-world applications?",
      "answer": "Ensures integrations (APIs, downstream modules) can reliably parse and consume outputs.",
      "must": ["format enforcement", "integration", "parse"]
    },
    {
      "q": "How can you combine prompting and programmatic postprocessing?",
      "answer": "Prompt to encourage format, then parse output and correct or sanitize via code where needed.",
      "must": ["postprocessing", "sanitize", "prompt"]
    },
    {
      "q": "What is the assignment objective for Week 12?",
      "answer": "Implement multiple output structuring methods, validate formats, handle errors, and build parsers.",
      "must": ["output structuring", "validation", "parsers"]
    },
    {
      "q": "Give an example of enforcing stop sequences to prevent extraneous text.",
      "answer": "Include “<END>” marker in template and set it as stop sequence so model halts output beyond it.",
      "must": ["stop sequence", "marker", "halt"]
    },
    {
      "q": "What is format drift and how to guard against it?",
      "answer": "When model format changes over time; guard via validation, prompting examples, and checks on outputs.",
      "must": ["format drift", "validation", "checks"]
    },
    {
      "q": "How do you design fallback strategies for structuring failures?", 
      "answer": "Combine retries, simpler prompts, fallback parsers, or human review when structure fails repeatedly.",
      "must": ["fallback", "retries", "review"]
    }
 ]
No results found