Last active
October 8, 2025 12:19
-
-
Save junhua/1bd5134519938f2f8c78c3a57ca79b43 to your computer and use it in GitHub Desktop.
ICT3113-OPT-RAG-EVAL
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [ | |
| { | |
| "q": "What is NLP and what are its main goals?", | |
| "answer": "NLP (Natural Language Processing) enables machines to understand, interpret, and generate human language, for tasks like translation, summarization, and sentiment analysis.", | |
| "must": ["NLP", "human language", "understand"] | |
| }, | |
| { | |
| "q": "Describe the traditional NLP pipeline stages.", | |
| "answer": "Stages include text preprocessing (tokenization, cleaning), feature extraction, modeling, evaluation and postprocessing.", | |
| "must": ["preprocessing", "feature extraction", "modeling"] | |
| }, | |
| { | |
| "q": "How did rule-based approaches dominate early NLP methods?", | |
| "answer": "They relied on handcrafted linguistic rules, grammars, and lexicons, often brittle and labor-intensive.", | |
| "must": ["rule-based", "lexicon", "grammar"] | |
| }, | |
| { | |
| "q": "Why did statistical and machine learning methods become dominant in NLP?", | |
| "answer": "They enable models to learn from data, handle large corpora, and generalize better than rigid rules.", | |
| "must": ["statistical", "learning", "generalize"] | |
| }, | |
| { | |
| "q": "What is the paradigm shift introduced by modern LLMs?", | |
| "answer": "From task-specific models to general-purpose LLMs that can adapt via prompts/fine-tuning across many tasks.", | |
| "must": ["task-specific", "general-purpose", "prompt"] | |
| }, | |
| { | |
| "q": "Define zero-shot learning in the context of NLP.", | |
| "answer": "Zero-shot is when a model performs a task it was not explicitly trained on, using only its pretrained knowledge and prompt instruction.", | |
| "must": ["zero-shot", "pretrained", "prompt"] | |
| }, | |
| { | |
| "q": "What is few-shot prompting and how is it used?", | |
| "answer": "Few-shot includes giving a few examples in the prompt to guide the model toward the desired behavior on a new task.", | |
| "must": ["few-shot", "prompt", "examples"] | |
| }, | |
| { | |
| "q": "List some application areas of NLP today.", | |
| "answer": "Applications include machine translation, question answering, summarization, sentiment analysis, chatbots, information extraction.", | |
| "must": ["machine translation", "question answering", "summarization"] | |
| }, | |
| { | |
| "q": "What are named entity recognition (NER) and part-of-speech (POS) tagging?", | |
| "answer": "NER identifies and classifies named entities (persons, locations); POS tagging labels words with grammatical categories (noun, verb, etc.).", | |
| "must": ["named entity recognition", "part-of-speech", "classification"] | |
| }, | |
| { | |
| "q": "How does the rise of deep learning change NLP capabilities?", | |
| "answer": "Deep learning allows end-to-end models, capturing nonlinear semantic patterns and contextual embeddings, improving many tasks.", | |
| "must": ["deep learning", "embeddings", "contextual"] | |
| }, | |
| { | |
| "q": "What is the role of the ‘lab’ sessions in Week 1?", | |
| "answer": "Lab introduces hands-on practice: tokenization with NLTK, POS tagging, embedding visualization, basic text generation.", | |
| "must": ["tokenization", "POS tagging", "embedding visualization"] | |
| }, | |
| { | |
| "q": "What is a challenge of rule-based NLP systems?", | |
| "answer": "They have poor scalability, brittle rules, difficulty in handling ambiguity and unseen phenomena.", | |
| "must": ["scalability", "ambiguity", "unseen"] | |
| }, | |
| { | |
| "q": "Why is generalization more difficult in language than in simpler domains?", | |
| "answer": "Because language has high variability, compositionality, ambiguity, and open vocabulary.", | |
| "must": ["variability", "ambiguity", "compositionality"] | |
| }, | |
| { | |
| "q": "What is the difference between input representation and model architecture in NLP?", | |
| "answer": "Input representation handles how text is encoded (tokens, embeddings), whereas architecture is the model structure (e.g. transformer).", | |
| "must": ["representation", "architecture", "token"] | |
| }, | |
| { | |
| "q": "Explain the concept of distributional hypothesis in NLP.", | |
| "answer": "Words that appear in similar contexts have similar meanings—this underlies embedding techniques.", | |
| "must": ["distributional hypothesis", "context", "meaning"] | |
| }, | |
| { | |
| "q": "How did corpus availability affect NLP research evolution?", | |
| "answer": "More data (corpora) enabled statistical methods and neural models; lack of data earlier limited performance.", | |
| "must": ["corpus", "data", "statistical"] | |
| }, | |
| { | |
| "q": "What is the general purpose of evaluation in NLP?", | |
| "answer": "To measure model performance, compare methods, diagnose errors, and guide improvements.", | |
| "must": ["evaluation", "performance", "diagnose"] | |
| }, | |
| { | |
| "q": "Why do we integrate application stories in the introductory lecture?", | |
| "answer": "To motivate students, illustrate real-world impact, and provide context for theory.", | |
| "must": ["application", "motivation", "real-world"] | |
| }, | |
| { | |
| "q": "What is a limitation of purely data-driven methods in NLP?", | |
| "answer": "They may lack interpretability, suffer from bias, require large data, and struggle with rare phenomena.", | |
| "must": ["interpretability", "bias", "rare"] | |
| }, | |
| { | |
| "q": "How do LLMs relate to the content of Week 1 Session 2?", | |
| "answer": "Session 2 introduces the revolution brought by LLMs, paradigm shift, zero/few-shot learning, modern architectures.", | |
| "must": ["LLM", "paradigm shift", "zero-shot"] | |
| }, | |
| { | |
| "q": "Define tokenization and explain its significance in NLP preprocessing.", | |
| "answer": "Tokenization splits raw text into units (words, subwords) which serve as inputs to downstream models.", | |
| "must": ["tokenization", "preprocessing", "subword"] | |
| }, | |
| { | |
| "q": "What is normalization in text preprocessing? Give examples.", | |
| "answer": "Normalization standardizes text: lowercasing, unicode normalization, punctuation removal, number normalization.", | |
| "must": ["normalization", "lowercasing", "unicode"] | |
| }, | |
| { | |
| "q": "Compare stemming and lemmatization.", | |
| "answer": "Stemming applies heuristic truncation (may produce nonwords); lemmatization uses morphological analysis to produce dictionary form.", | |
| "must": ["stemming", "lemmatization", "morphological"] | |
| }, | |
| { | |
| "q": "Why remove stop words? What is a disadvantage?", | |
| "answer": "Removing stop words reduces noise and model size, but may lose function words essential to meaning in some contexts.", | |
| "must": ["stop words", "noise", "meaning"] | |
| }, | |
| { | |
| "q": "What issues arise when tokenizing Korean or Chinese?", | |
| "answer": "They lack explicit whitespace segmentation, have complex morphology, so tokenization requires special methods (morpheme, BPE).", | |
| "must": ["Korean", "segmentation", "morphology"] | |
| }, | |
| { | |
| "q": "What is Byte Pair Encoding (BPE)?", | |
| "answer": "BPE is a subword tokenization method merging frequent symbol pairs iteratively to balance vocabulary and coverage.", | |
| "must": ["BPE", "subword", "vocabulary"] | |
| }, | |
| { | |
| "q": "Explain WordPiece tokenization.", | |
| "answer": "WordPiece builds subwords by selecting highest likelihood splits under a language model, commonly used in BERT.", | |
| "must": ["WordPiece", "subword", "BERT"] | |
| }, | |
| { | |
| "q": "What problem do subword tokenizers solve compared to word-level tokenizers?", | |
| "answer": "They reduce out-of-vocab issues while preserving meaningful units and controlling vocabulary size.", | |
| "must": ["out-of-vocab", "vocabulary", "subword"] | |
| }, | |
| { | |
| "q": "How is Unicode normalization useful?", | |
| "answer": "It ensures canonical forms (e.g. accents) are consistently represented (NFC, NFD), improving matching and cleaning.", | |
| "must": ["Unicode", "normalization", "canonical"] | |
| }, | |
| { | |
| "q": "What is lowercasing and when is it harmful?", | |
| "answer": "Lowercasing makes case-insensitive, reducing sparsity; but harmful if case carries information (e.g. named entities).", | |
| "must": ["lowercasing", "case", "information"] | |
| }, | |
| { | |
| "q": "Why is punctuation handling nontrivial in preprocessing?", | |
| "answer": "Because punctuation may carry semantic cues (e.g. “?”) or part of tokens (e.g. “U.S.A.”), so naive removal can harm meaning.", | |
| "must": ["punctuation", "semantic", "token"] | |
| }, | |
| { | |
| "q": "What is the role of a vocabulary in tokenization?", | |
| "answer": "Vocabulary maps tokens/subwords to indices; its size and coverage influence model capacity and OOV handling.", | |
| "must": ["vocabulary", "tokens", "OOV"] | |
| }, | |
| { | |
| "q": "How do you handle digits and numbers during preprocessing?", | |
| "answer": "You may normalize (e.g. replace with <NUM>), separate digits, or keep them if domain-specific.", | |
| "must": ["digits", "normalize", "domain"] | |
| }, | |
| { | |
| "q": "What is text cleaning? Examples of noisy tokens?", | |
| "answer": "Cleaning removes unwanted artifacts: HTML tags, URLs, emojis, extra whitespace, control characters.", | |
| "must": ["cleaning", "URLs", "emojis"] | |
| }, | |
| { | |
| "q": "When might you not remove stop words?", | |
| "answer": "In tasks like machine translation or reading comprehension where function words matter.", | |
| "must": ["translation", "function words", "comprehension"] | |
| }, | |
| { | |
| "q": "What is tokenization ambiguity? Example.", | |
| "answer": "Ambiguity arises when segmentation is unclear (e.g. “therapist” vs “the rapist”), requiring context awareness.", | |
| "must": ["ambiguity", "segmentation", "context"] | |
| }, | |
| { | |
| "q": "What is a lexicon and how is it used in preprocessing?", | |
| "answer": "A lexicon is a dictionary of words and morphological forms; used for normalization, lemmatization, POS dictionaries.", | |
| "must": ["lexicon", "dictionary", "lemmatization"] | |
| }, | |
| { | |
| "q": "Why is reproducibility important in preprocessing pipelines?", | |
| "answer": "So results are consistent across runs; preprocessing decisions (tokenization, normalization) must be deterministic.", | |
| "must": ["reproducibility", "deterministic", "consistency"] | |
| }, | |
| { | |
| "q": "How do you evaluate or debug preprocessing quality?", | |
| "answer": "By sampling tokenization outputs, comparing to gold standard, checking error cases and rare tokens.", | |
| "must": ["evaluation", "debug", "error cases"] | |
| }, | |
| { | |
| "q": "What is a language model (LM)?", | |
| "answer": "A language model assigns probabilities to sequences of tokens or predicts next tokens in text.", | |
| "must": ["language model", "probabilities", "sequence"] | |
| }, | |
| { | |
| "q": "Explain an n-gram model.", | |
| "answer": "An n-gram model approximates probability of a token given (n-1) preceding tokens, using frequency counts.", | |
| "must": ["n-gram", "probability", "counts"] | |
| }, | |
| { | |
| "q": "What is the Markov assumption in n-gram modeling?", | |
| "answer": "That the probability of a token depends only on a limited history of length (n-1), not full context.", | |
| "must": ["Markov assumption", "history", "context"] | |
| }, | |
| { | |
| "q": "Define perplexity and how it's computed.", | |
| "answer": "Perplexity = exp(−(1/N) * log-likelihood); lower means the model predicts better.", | |
| "must": ["perplexity", "log-likelihood", "evaluation"] | |
| }, | |
| { | |
| "q": "Why smoothing is needed in n-gram models? Name one method.", | |
| "answer": "To assign nonzero probabilities to unseen n-grams; e.g. Laplace (add-one), Kneser-Ney smoothing.", | |
| "must": ["smoothing", "unseen", "Kneser-Ney"] | |
| }, | |
| { | |
| "q": "What is backoff and interpolation in smoothing?", | |
| "answer": "Backoff uses lower-order model when higher-order is unseen; interpolation combines multiple orders weighted.", | |
| "must": ["backoff", "interpolation", "lower-order"] | |
| }, | |
| { | |
| "q": "What are the main limitations of n-gram models?", | |
| "answer": "Sparsity, limited context, poor generalization, large parameter space for high n.", | |
| "must": ["sparsity", "limited context", "generalization"] | |
| }, | |
| { | |
| "q": "Why are language models useful in downstream tasks?", | |
| "answer": "They provide prior probabilities, help in scoring candidate outputs, or as components in sequence models.", | |
| "must": ["prior", "scoring", "downstream"] | |
| }, | |
| { | |
| "q": "How do you estimate probabilities from counts in n-gram models?", | |
| "answer": "Use maximum likelihood estimation: (P(w_n | w_{n-(n-1)}) = \\frac{\\text{count}(w_{n-(n-1)},w_n)}{\\text{count}(w_{n-(n-1)})}\\).", | |
| "must": ["maximum likelihood", "counts", "conditional"] | |
| }, | |
| { | |
| "q": "What is the curse of dimensionality in language modeling?", | |
| "answer": "The number of possible n-grams grows exponentially, causing data sparsity and storage issues.", | |
| "must": ["dimensionality", "exponential", "sparsity"] | |
| }, | |
| { | |
| "q": "What is a skip-gram and how does it differ from n-gram counting?", | |
| "answer": "Skip-gram (in embeddings) predicts context words skipping intervening ones; not same as statistical n-gram counts.", | |
| "must": ["skip-gram", "context", "prediction"] | |
| }, | |
| { | |
| "q": "When would perplexity mislead as a metric?", | |
| "answer": "When models assign high probabilities to frequent tokens but perform poorly on rarer or downstream tasks.", | |
| "must": ["mislead", "frequent tokens", "downstream"] | |
| }, | |
| { | |
| "q": "What is the relation between cross-entropy and perplexity?", | |
| "answer": "Perplexity = 2^{cross-entropy} (if log base 2), so lower cross-entropy → lower perplexity.", | |
| "must": ["cross-entropy", "perplexity", "relation"] | |
| }, | |
| { | |
| "q": "How to compare two language models using perplexity?", | |
| "answer": "Compute perplexity on the same held-out set; lower perplexity indicates better predictions.", | |
| "must": ["compare", "held-out", "lower"] | |
| }, | |
| { | |
| "q": "What is an advantage of statistical LMs vs rule-based for prediction?", | |
| "answer": "They can estimate probabilities over alternatives and generalize from data instead of fixed rules.", | |
| "must": ["probability", "generalize", "data"] | |
| }, | |
| { | |
| "q": "What role does smoothing play in generalization?", | |
| "answer": "Smoothing distributes probability mass to unseen events, enabling better generalization to unseen cases.", | |
| "must": ["smoothing", "distribution", "generalization"] | |
| }, | |
| { | |
| "q": "Why can neural language models outperform n-gram models?", | |
| "answer": "Neural models embed context, share parameters, handle longer context, and generalize across vocabulary.", | |
| "must": ["neural", "embed", "generalize"] | |
| }, | |
| { | |
| "q": "What is teacher forcing during LM training?", | |
| "answer": "At training time, the model is fed the ground-truth previous token instead of its own prediction.", | |
| "must": ["teacher forcing", "training", "ground-truth"] | |
| }, | |
| { | |
| "q": "What is a word embedding and why is it useful?", | |
| "answer": "A dense vector representation capturing semantics, enabling similarity and compact encoding compared to sparse one-hot.", | |
| "must": ["word embedding", "dense", "semantics"] | |
| }, | |
| { | |
| "q": "Describe the one-hot representation and its downsides.", | |
| "answer": "One-hot is a sparse vector with one “1” per word; downsides are high dimensionality and no notion of similarity.", | |
| "must": ["one-hot", "sparse", "dimensionality"] | |
| }, | |
| { | |
| "q": "How does Word2Vec’s skip-gram model work?", | |
| "answer": "Given a target word, skip-gram predicts surrounding context words by maximizing softmax probabilities or using negative sampling.", | |
| "must": ["skip-gram", "Word2Vec", "context"] | |
| }, | |
| { | |
| "q": "How does the Word2Vec CBOW variant work?", | |
| "answer": "CBOW predicts the target word from surrounding context words by averaging context embeddings and using softmax.", | |
| "must": ["CBOW", "context", "predict"] | |
| }, | |
| { | |
| "q": "What is negative sampling and why is it used?", | |
| "answer": "It approximates full softmax by sampling negative (non-context) words, reducing computational cost.", | |
| "must": ["negative sampling", "softmax", "approximation"] | |
| }, | |
| { | |
| "q": "Explain hierarchical softmax.", | |
| "answer": "Hierarchical softmax replaces flat softmax with binary tree structure to reduce cost of probability computation for large vocabularies.", | |
| "must": ["hierarchical softmax", "binary tree", "cost"] | |
| }, | |
| { | |
| "q": "What is the role of context window size?", | |
| "answer": "Window size determines how many neighbors are considered; too small misses semantics, too large introduces noise.", | |
| "must": ["window size", "context", "noise"] | |
| }, | |
| { | |
| "q": "What is the evaluation of embeddings (intrinsic vs extrinsic)?", | |
| "answer": "Intrinsic: similarity / analogy tasks; extrinsic: performance gain in downstream tasks.", | |
| "must": ["intrinsic", "extrinsic", "downstream"] | |
| }, | |
| { | |
| "q": "What is GloVe and how is it different from Word2Vec?", | |
| "answer": "GloVe uses global co-occurrence counts and factorizes log co-occurrence matrix; Word2Vec is local context prediction.", | |
| "must": ["GloVe", "co-occurrence", "prediction"] | |
| }, | |
| { | |
| "q": "What is FastText, and how does it handle OOV words?", | |
| "answer": "FastText represents words as sum of subword (n-gram) embeddings, enabling representation for unseen words.", | |
| "must": ["FastText", "subword", "OOV"] | |
| }, | |
| { | |
| "q": "Why do embeddings reflect semantic similarity?", | |
| "answer": "Because they are trained to place contextually similar words close in vector space under geometric constraints.", | |
| "must": ["semantic similarity", "vector space", "geometry"] | |
| }, | |
| { | |
| "q": "What is the analogy task (king – man + woman)? Why is it used?", | |
| "answer": "Analogy tests vector arithmetic properties (king – man + woman = queen) to evaluate embedding semantics.", | |
| "must": ["analogy", "vector arithmetic", "queen"] | |
| }, | |
| { | |
| "q": "What is cosine similarity? How used in embedding space?", | |
| "answer": "Cosine similarity = dot(u, v) / (|u||v|), measures angular closeness; used to measure embedding similarity.", | |
| "must": ["cosine similarity", "dot", "magnitude"] | |
| }, | |
| { | |
| "q": "What is the effect of normalizing embedding vectors?", | |
| "answer": "Normalization ensures length invariance, so comparisons focus on direction/semantic, not magnitude.", | |
| "must": ["normalization", "direction", "magnitude"] | |
| }, | |
| { | |
| "q": "What is the curse of hubness in high-dimensional embeddings?", | |
| "answer": "Some points (hubs) appear overly often as nearest neighbors, biasing similarity results.", | |
| "must": ["hubness", "nearest neighbor", "bias"] | |
| }, | |
| { | |
| "q": "What is embedding projection and dimensionality reduction (e.g. PCA, t-SNE)?", | |
| "answer": "They reduce embedding dimensions (for visualization or efficiency) while preserving structure (variance or neighborhoods).", | |
| "must": ["PCA", "t-SNE", "dimensionality"] | |
| }, | |
| { | |
| "q": "How can embedding drift happen over time?", | |
| "answer": "As corpora evolve, retraining may shift embeddings such that old and new embeddings become incompatible.", | |
| "must": ["drift", "retraining", "compatibility"] | |
| }, | |
| { | |
| "q": "Why integrate embeddings into downstream NLP tasks (e.g. classification)?", | |
| "answer": "Because embeddings serve as rich features capturing semantic relations, improving model inputs.", | |
| "must": ["features", "semantic relations", "downstream"] | |
| }, | |
| { | |
| "q": "What is the attention mechanism and why is it important?", | |
| "answer": "Attention computes weighted interactions between elements in a sequence, enabling models to focus on relevant parts.", | |
| "must": ["attention", "weighted", "focus"] | |
| }, | |
| { | |
| "q": "Define self-attention in the transformer context.", | |
| "answer": "Self-attention lets each token attend to all others (or itself) in the same sequence via queries, keys, values.", | |
| "must": ["self-attention", "queries", "values"] | |
| }, | |
| { | |
| "q": "What are queries, keys, and values in attention?", | |
| "answer": "They are linear projections of input: query matches keys, weights values to compute attention output.", | |
| "must": ["queries", "keys", "values"] | |
| }, | |
| { | |
| "q": "Explain scaled dot-product attention.", | |
| "answer": "Attention = softmax((QKᵀ)/√d_k) V, where scaling by √d_k stabilizes gradients for large dimension.", | |
| "must": ["scaled dot-product", "softmax", "scale"] | |
| }, | |
| { | |
| "q": "What is multi-head attention and its benefit?", | |
| "answer": "Multiple parallel attention heads allow the model to capture different types of relationships in different subspaces.", | |
| "must": ["multi-head", "parallel", "subspaces"] | |
| }, | |
| { | |
| "q": "Describe the architecture of a Transformer encoder layer.", | |
| "answer": "Encoder = multi-head self-attention + residual + layer norm + feedforward + residual + layer norm.", | |
| "must": ["encoder", "residual", "layer norm"] | |
| }, | |
| { | |
| "q": "What is positional encoding and why is it used?", | |
| "answer": "Positional encoding injects token order information (sinusoids or learnable) because self-attention is order-invariant.", | |
| "must": ["positional encoding", "order", "invariant"] | |
| }, | |
| { | |
| "q": "How does the Transformer decoder differ from encoder?", | |
| "answer": "Decoder has masked self-attention, encoder-decoder attention, plus feedforward and residuals.", | |
| "must": ["masked self-attention", "encoder-decoder", "decoder"] | |
| }, | |
| { | |
| "q": "What is masking in the decoder self-attention?", | |
| "answer": "Mask prevents attending to future tokens, enforcing autoregressive prediction during training/inference.", | |
| "must": ["masking", "future tokens", "autoregressive"] | |
| }, | |
| { | |
| "q": "What is the role of feedforward network inside transformer layers?", | |
| "answer": "A two-layer MLP applied per token to transform representations nonlinearly, with residual connection.", | |
| "must": ["feedforward", "MLP", "residual"] | |
| }, | |
| { | |
| "q": "Why residual connections and layer normalization are important?", | |
| "answer": "Residuals ease gradient flow; layer norm stabilizes training and ensures normalized activations.", | |
| "must": ["residual", "layer norm", "stability"] | |
| }, | |
| { | |
| "q": "What is BERT and how does it use transformer architecture?", | |
| "answer": "BERT is a bidirectional transformer pretrained with masked LM and next sentence prediction objectives.", | |
| "must": ["BERT", "bidirectional", "masked LM"] | |
| }, | |
| { | |
| "q": "What is the difference between BERT and GPT architectures?", | |
| "answer": "BERT is encoder-only and bidirectional, GPT is decoder-only and autoregressive (unidirectional).", | |
| "must": ["encoder-only", "decoder-only", "autoregressive"] | |
| }, | |
| { | |
| "q": "How do you visualize attention weights for interpretability?", | |
| "answer": "You map attention matrices to heatmaps over token pairs to see which tokens attend to which.", | |
| "must": ["visualize", "attention weights", "heatmap"] | |
| }, | |
| { | |
| "q": "What is the transformer’s capacity to model long-range dependencies?", | |
| "answer": "Self-attention attends globally, enabling capturing distant dependencies better than RNNs.", | |
| "must": ["long-range", "global attention", "dependencies"] | |
| }, | |
| { | |
| "q": "What is the computational complexity of self-attention in sequence length?", | |
| "answer": "It is O(n²) in sequence length due to pairwise dot products, which may be costly for large n.", | |
| "must": ["O(n^2)", "complexity", "sequence length"] | |
| }, | |
| { | |
| "q": "Name one efficient attention variant for long sequences.", | |
| "answer": "Variants include Linformer, Performer, Longformer, Reformer, sparse attention mechanisms.", | |
| "must": ["Longformer", "sparse attention", "efficient"] | |
| }, | |
| { | |
| "q": "How do you fine-tune a transformer for a downstream task?", | |
| "answer": "Add a task-specific head (e.g. classification) on top of transformer output and train with task loss on labeled data.", | |
| "must": ["fine-tune", "task-specific head", "loss"] | |
| }, | |
| { | |
| "q": "What is an LLM API and why is it useful?", | |
| "answer": "LLM APIs let you access large pretrained models (like GPT) over network, avoiding heavy local compute.", | |
| "must": ["LLM API", "pretrained model", "network"] | |
| }, | |
| { | |
| "q": "How do you call the OpenAI API to generate text?", | |
| "answer": "You send a prompt, set parameters (model, max_tokens, temperature), and receive generated text.", | |
| "must": ["prompt", "temperature", "max_tokens"] | |
| }, | |
| { | |
| "q": "What is temperature in sampling methods?", | |
| "answer": "Temperature controls randomness: high temperature flattens distribution (more randomness), low sharpens peak.", | |
| "must": ["temperature", "sampling", "distribution"] | |
| }, | |
| { | |
| "q": "What is top-k sampling?", | |
| "answer": "Select among top k highest-probability tokens and sample only from them, excluding long tail.", | |
| "must": ["top-k", "sampling", "probability"] | |
| }, | |
| { | |
| "q": "What is nucleus (top-p) sampling?", | |
| "answer": "Choose smallest set of tokens whose cumulative probability ≥ p, sample from them.", | |
| "must": ["top-p", "nucleus", "cumulative"] | |
| }, | |
| { | |
| "q": "What is greedy decoding?", | |
| "answer": "Always pick the highest-probability token at each step; deterministic but may be suboptimal.", | |
| "must": ["greedy", "highest probability", "deterministic"] | |
| }, | |
| { | |
| "q": "What is beam search and how does it work?", | |
| "answer": "Beam search keeps top B candidate sequences at each step, exploring alternatives to maximize total score.", | |
| "must": ["beam search", "candidates", "score"] | |
| }, | |
| { | |
| "q": "Why include randomness (sampling) in generation?", | |
| "answer": "To increase diversity and avoid repetitive or overly deterministic output.", | |
| "must": ["diversity", "randomness", "avoid repetition"] | |
| }, | |
| { | |
| "q": "How does token limit affect API responses?", | |
| "answer": "Max tokens constrains output length; exceeding or misestimating may truncate responses.", | |
| "must": ["token limit", "truncate", "length"] | |
| }, | |
| { | |
| "q": "What is stop sequence in generation APIs?", | |
| "answer": "A sequence of characters which, if generated, signals the model to stop output early.", | |
| "must": ["stop sequence", "terminate", "output"] | |
| }, | |
| { | |
| "q": "How do you handle over-long generation from LLM APIs?", | |
| "answer": "You set max_tokens, use stop sequences, or prune output programmatically.", | |
| "must": ["max_tokens", "prune", "stop sequence"] | |
| }, | |
| { | |
| "q": "Why monitor token usage and cost when using APIs?", | |
| "answer": "Because billing is often per token; inefficient prompts or long responses increase cost.", | |
| "must": ["token usage", "cost", "billing"] | |
| }, | |
| { | |
| "q": "What is prompt chaining or iterative prompting?", | |
| "answer": "Breaking a task into subprompts and chaining outputs to solve complex tasks step by step.", | |
| "must": ["prompt chaining", "subprompts", "iterative"] | |
| }, | |
| { | |
| "q": "What is context window and how does it limit LLMs?", | |
| "answer": "The maximum token length model can condition on; longer inputs beyond this are truncated or dropped.", | |
| "must": ["context window", "token length", "truncate"] | |
| }, | |
| { | |
| "q": "How can you mitigate context window overflow?", | |
| "answer": "By chunking input, summarizing, or sliding window techniques.", | |
| "must": ["chunking", "summarizing", "sliding window"] | |
| }, | |
| { | |
| "q": "What is a “prompt injection” risk?", | |
| "answer": "Attackers may inject malicious instructions in user input that are executed by the LLM.", | |
| "must": ["prompt injection", "attack", "instructions"] | |
| }, | |
| { | |
| "q": "How do you evaluate generated text quality?", | |
| "answer": "Using human evaluation, BLEU, ROUGE, coherence, relevance, and error analysis.", | |
| "must": ["BLEU", "ROUGE", "coherence"] | |
| }, | |
| { | |
| "q": "What is temperature = 0 behavior in sampling?", | |
| "answer": "Equivalent to greedy decoding: always choose highest-probability token.", | |
| "must": ["temperature 0", "greedy", "deterministic"] | |
| }, | |
| { | |
| "q": "What is the focus of the Nobel Physics special lecture in this course?", | |
| "answer": "Exploring foundational discoveries in neural networks and deep learning evolution.", | |
| "must": ["neural networks", "deep learning", "foundational"] | |
| }, | |
| { | |
| "q": "How do advances in physics relate to AI model development historically?", | |
| "answer": "Physics advances in optimization, statistics, and signal processing influenced learning algorithms and architectures.", | |
| "must": ["optimization", "statistics", "algorithms"] | |
| }, | |
| { | |
| "q": "What is the connection between energy minimization (physics) and loss minimization in ML?", | |
| "answer": "Training neural networks often corresponds to minimizing an energy or potential function analogous to physics systems.", | |
| "must": ["energy minimization", "loss function", "analogy"] | |
| }, | |
| { | |
| "q": "What is the theme of the Nobel Chemistry special lecture?", | |
| "answer": "Computational protein design, structure prediction using AI, and the intersection of chemistry and ML.", | |
| "must": ["protein design", "structure prediction", "AI"] | |
| }, | |
| { | |
| "q": "How does protein folding prediction benefit from ML techniques?", | |
| "answer": "Models like AlphaFold use deep learning to predict 3D structure from sequence, leveraging patterns in known structures.", | |
| "must": ["AlphaFold", "sequence", "structure"] | |
| }, | |
| { | |
| "q": "Name a key challenge in computational chemistry that AI helps address.", | |
| "answer": "Large combinatorial search space of molecules, expensive physics-based simulation, and low-data regimes.", | |
| "must": ["combinatorial", "simulation", "low-data"] | |
| }, | |
| { | |
| "q": "Why are special lectures included in an NLP/AI curriculum?", | |
| "answer": "To expose students to interdisciplinary impact of AI, inspire breadth, and connect theory to frontier areas.", | |
| "must": ["interdisciplinary", "impact", "frontier"] | |
| }, | |
| { | |
| "q": "What is the benefit of linking AI with other sciences (e.g. chemistry, physics)?", | |
| "answer": "It fosters cross-domain innovation, applies methods across fields, and yields richer research directions.", | |
| "must": ["cross-domain", "innovation", "research"] | |
| }, | |
| { | |
| "q": "How might advances in neural architectures from physics insights inform future NLP models?", | |
| "answer": "Physics-inspired architectures (e.g. energy-based models) or optimization techniques may improve model design.", | |
| "must": ["energy-based", "architectures", "optimization"] | |
| }, | |
| { | |
| "q": "What ethical or societal factor arises when AI meets scientific domains?", | |
| "answer": "Issues include reproducibility, interpretability, domain bias, and misuse of results in high-stakes areas.", | |
| "must": ["reproducibility", "interpretability", "bias"] | |
| }, | |
| { | |
| "q": "Give an example of AI-driven discovery in chemistry.", | |
| "answer": "Designing novel molecules, drug discovery, enzyme engineering, materials with desired properties.", | |
| "must": ["molecules", "drug discovery", "enzyme"] | |
| }, | |
| { | |
| "q": "Describe how ML models can approximate expensive physics simulations.", | |
| "answer": "Using surrogate models, neural approximators that learn mapping from inputs to simulation outputs, reducing cost.", | |
| "must": ["surrogate models", "approximation", "simulation"] | |
| }, | |
| { | |
| "q": "What is transfer learning and how might it apply across domains (e.g. NLP to chemistry)?", | |
| "answer": "Transfer learning reuses pretrained features from one domain to another, aiding low-data tasks.", | |
| "must": ["transfer learning", "pretrained", "low-data"] | |
| }, | |
| { | |
| "q": "Why is model interpretability especially critical in scientific domains?", | |
| "answer": "Because decisions (e.g. drug design) require trust, explanation, and verification beyond black-box predictions.", | |
| "must": ["interpretability", "trust", "verification"] | |
| }, | |
| { | |
| "q": "How do generative models (e.g. GANs, diffusion) relate to scientific applications?", | |
| "answer": "They generate novel samples (e.g. molecules, materials) under constraints learned from data.", | |
| "must": ["generative models", "GAN", "diffusion"] | |
| }, | |
| { | |
| "q": "What is the frontier challenge when combining AI models with physical laws?", | |
| "answer": "Incorporating domain constraints, preserving physical consistency, and ensuring generalization beyond training data.", | |
| "must": ["constraints", "consistency", "generalization"] | |
| }, | |
| { | |
| "q": "What role do these special lectures serve in project ideation?", | |
| "answer": "They spark interdisciplinary project ideas and broaden students’ perspectives beyond pure NLP.", | |
| "must": ["project", "interdisciplinary", "perspective"] | |
| }, | |
| { | |
| "q": "What is prompt engineering?", | |
| "answer": "The practice of designing prompts to guide LLMs to produce desired outputs, influencing behavior and correctness.", | |
| "must": ["prompt engineering", "design", "desired outputs"] | |
| }, | |
| { | |
| "q": "Define zero-shot prompting and few-shot prompting.", | |
| "answer": "Zero-shot gives no examples and relies on model’s pretrained ability; few-shot includes example pairs to steer output.", | |
| "must": ["zero-shot", "few-shot", "examples"] | |
| }, | |
| { | |
| "q": "What is chain-of-thought prompting?", | |
| "answer": "Asking the model to explain reasoning steps explicitly in the prompt to improve logical task performance.", | |
| "must": ["chain-of-thought", "reasoning", "prompt"] | |
| }, | |
| { | |
| "q": "Why is prompt phrasing sensitive? Give an example.", | |
| "answer": "Small changes may lead to drastically different behavior; e.g. “List reasons why X” vs “Explain X”.", | |
| "must": ["phrasing", "sensitive", "behavior"] | |
| }, | |
| { | |
| "q": "What is prompt injection and how to guard against it?", | |
| "answer": "Adversarial text input trying to override instructions; guard via input sanitization and robust prompt templates.", | |
| "must": ["prompt injection", "adversarial", "sanitize"] | |
| }, | |
| { | |
| "q": "What is the verbalizer in prompt templates (e.g. in prompt tuning)?", | |
| "answer": "It maps model output tokens back to task labels (e.g. “positive” → label 1).", | |
| "must": ["verbalizer", "mapping", "labels"] | |
| }, | |
| { | |
| "q": "What is prompt tuning / soft prompt?", | |
| "answer": "Learned continuous prompt embeddings prepended to input, rather than discrete text prompts.", | |
| "must": ["prompt tuning", "continuous", "embedding"] | |
| }, | |
| { | |
| "q": "What is instruction tuning?", | |
| "answer": "Training the model with diverse natural language instructions so it generalizes to new tasks.", | |
| "must": ["instruction tuning", "diverse", "generalize"] | |
| }, | |
| { | |
| "q": "What is the role of demonstrations in prompting?", | |
| "answer": "Demonstrations (examples) help the model see the desired input-output mapping and reduce ambiguity.", | |
| "must": ["demonstrations", "mapping", "ambiguity"] | |
| }, | |
| { | |
| "q": "How does few-shot prompting reduce hallucination risk?", | |
| "answer": "By providing examples, it constrains output patterns, reducing freedom to hallucinate unrelated content.", | |
| "must": ["hallucination", "examples", "constrain"] | |
| }, | |
| { | |
| "q": "What is iterative refinement in prompts?", | |
| "answer": "Using multiple prompt stages: ask, review, revise output to improve correctness.", | |
| "must": ["iterative", "refinement", "review"] | |
| }, | |
| { | |
| "q": "What is self-consistency prompting?", | |
| "answer": "Generate multiple reasoning paths and aggregate consensus to improve accuracy.", | |
| "must": ["self-consistency", "multiple paths", "consensus"] | |
| }, | |
| { | |
| "q": "What is a prompt template? Example elements?", | |
| "answer": "A blueprint with placeholders (e.g. {input}, {instruction}, {examples}); ensures structure across prompts.", | |
| "must": ["template", "placeholder", "structure"] | |
| }, | |
| { | |
| "q": "Why is prompt evaluation (A/B testing) important?", | |
| "answer": "To compare prompt effectiveness, measure output quality, and choose best prompt variant.", | |
| "must": ["evaluation", "A/B", "quality"] | |
| }, | |
| { | |
| "q": "What is meta-prompting?", | |
| "answer": "Asking the model to generate or refine prompts itself as part of the pipeline.", | |
| "must": ["meta-prompting", "generate", "refine"] | |
| }, | |
| { | |
| "q": "What is the challenge of prompt generalization across tasks?", | |
| "answer": "A prompt performing well on one task may fail on another; general templates are hard to design.", | |
| "must": ["generalization", "task", "templates"] | |
| }, | |
| { | |
| "q": "How do you combine prompt engineering with fine-tuning?", | |
| "answer": "You can fine-tune with instructions and then further improve via prompting to adapt behavior.", | |
| "must": ["fine-tuning", "prompting", "behavior"] | |
| }, | |
| { | |
| "q": "What is zero-shot chain-of-thought prompting? Use case.", | |
| "answer": "Asking “Let’s think step by step” even without examples to encourage reasoning in zero-shot mode.", | |
| "must": ["zero-shot", "chain-of-thought", "step by step"] | |
| }, | |
| { | |
| "q": "What is an LLM-based Q&A system?", | |
| "answer": "A system that uses a large language model to answer user queries, often retrieving context or documents first.", | |
| "must": ["Q&A", "LLM", "retrieve"] | |
| }, | |
| { | |
| "q": "Why use a vector database in Q&A systems?", | |
| "answer": "To store dense embeddings of documents and support fast similarity search (nearest neighbor).", | |
| "must": ["vector database", "embeddings", "similarity"] | |
| }, | |
| { | |
| "q": "What is FAISS and how is it used?", | |
| "answer": "FAISS is a library for efficient similarity search over large embedding collections. It enables fast vector search.", | |
| "must": ["FAISS", "similarity", "vectors"] | |
| }, | |
| { | |
| "q": "What is retrieval-augmented generation (RAG)?", | |
| "answer": "RAG retrieves relevant documents from a corpus and uses them to condition the generation process, combining retrieval and generation.", | |
| "must": ["retrieval", "generation"] | |
| }, | |
| { | |
| "q": "What is a knowledge graph and what is a triple?", | |
| "answer": "A knowledge graph is a graph of entities and relations; a triple is (subject, predicate, object).", | |
| "must": ["knowledge graph", "triple"] | |
| }, | |
| { | |
| "q": "How do you integrate retrieval and LLM generation in a Q&A pipeline?", | |
| "answer": "Retrieve top-k documents, rank them, then feed them (as context) plus user query into the generation model.", | |
| "must": ["retrieve", "context", "generation"] | |
| }, | |
| { | |
| "q": "What is the role of embeddings in retrieval-based Q&A?", | |
| "answer": "Convert text into vector space so semantically similar texts are close and can be retrieved by similarity search.", | |
| "must": ["embeddings", "vector space", "semantic"] | |
| }, | |
| { | |
| "q": "What is dense retrieval vs sparse retrieval?", | |
| "answer": "Dense uses embeddings and vector similarity; sparse uses term-based indexing (e.g. BM25).", | |
| "must": ["dense retrieval", "sparse retrieval", "BM25"] | |
| }, | |
| { | |
| "q": "What is BM25 scoring?", | |
| "answer": "A term-frequency / inverse-document-frequency based retrieval scoring algorithm for sparse retrieval.", | |
| "must": ["BM25", "term frequency", "inverse document frequency"] | |
| }, | |
| { | |
| "q": "What is reranking in retrieval pipelines?", | |
| "answer": "After initial retrieval, use stronger models (cross-encoders) to re-score and reorder results for accuracy.", | |
| "must": ["reranking", "cross-encoder", "rescore"] | |
| }, | |
| { | |
| "q": "Why limit context length when sending to LLM for Q&A?", | |
| "answer": "Because LLMs have finite context windows; too much text may exceed limit or dilute signal.", | |
| "must": ["context length", "limit", "signal"] | |
| }, | |
| { | |
| "q": "What is hallucination in LLM-based Q&A?", | |
| "answer": "When the model generates plausible but incorrect or unsupported information.", | |
| "must": ["hallucination", "incorrect", "unsupported"] | |
| }, | |
| { | |
| "q": "How to reduce hallucination in RAG systems?", | |
| "answer": "Use verified documents, answer filtering, attribution, or grounding mechanisms.", | |
| "must": ["grounding", "filtering", "attribution"] | |
| }, | |
| { | |
| "q": "What is a system integration challenge in deployment of Q&A?", | |
| "answer": "Latency, scaling embedding search, prompt packaging, context management, error handling.", | |
| "must": ["latency", "scaling", "context"] | |
| }, | |
| { | |
| "q": "How to evaluate Q&A system performance?", | |
| "answer": "Metrics: accuracy, F1, exact match, human judgment, response time, user satisfaction.", | |
| "must": ["accuracy", "F1", "exact match"] | |
| }, | |
| { | |
| "q": "What is context window sliding or chunking for long documents?", | |
| "answer": "Break documents into overlapping windows or chunks and retrieve best chunks as context.", | |
| "must": ["chunking", "sliding window", "overlap"] | |
| }, | |
| { | |
| "q": "How do you handle conflicting retrieved evidence?", | |
| "answer": "Use reranking, conflict resolution heuristics, or ask LLM to reconcile contradictions.", | |
| "must": ["conflict", "reranking", "reconcile"] | |
| }, | |
| { | |
| "q": "What is multi-hop Q&A? Give an example.", | |
| "answer": "Answer that requires chaining reasoning across multiple documents (e.g. “Which author wrote book X and when?”).", | |
| "must": ["multi-hop", "chain", "reasoning"] | |
| }, | |
| { | |
| "q": "Why is caching helpful in Q&A systems?", | |
| "answer": "To reuse prior retrieval/generation results, reduce latency, and save API cost.", | |
| "must": ["caching", "latency", "cost"] | |
| }, | |
| { | |
| "q": "What is the cold-start issue in Q&A systems?", | |
| "answer": "When new topics lack indexed documents or embeddings, making retrieval ineffective initially.", | |
| "must": ["cold-start", "new topics", "ineffective"] | |
| }, | |
| { | |
| "q": "What is Flask and how is it used in web development for NLP apps?", | |
| "answer": "Flask is a lightweight Python web framework; in NLP apps it's used to create endpoints (APIs) that call LLM or model backends.", | |
| "must": ["Flask", "Python", "endpoint"] | |
| }, | |
| { | |
| "q": "What is Streamlit and when is it preferred?", | |
| "answer": "Streamlit is a framework for quickly building data apps with minimal code, useful for dashboards and prototyping.", | |
| "must": ["Streamlit", "dashboard", "prototyping"] | |
| }, | |
| { | |
| "q": "Explain client-server architecture in a web app.", | |
| "answer": "Client (browser) sends requests to server; server processes, possibly invoking ML models, returns responses.", | |
| "must": ["client", "server", "requests"] | |
| }, | |
| { | |
| "q": "How do you integrate LLM API calls into a Flask route?", | |
| "answer": "Within a route handler, accept user input, call LLM API (with prompt), and return response via JSON/html.", | |
| "must": ["route", "LLM API", "response"] | |
| }, | |
| { | |
| "q": "What is CORS and why does it matter in web apps?", | |
| "answer": "Cross-Origin Resource Sharing controls which domains can call your API; misconfigured CORS may block access.", | |
| "must": ["CORS", "domains", "access"] | |
| }, | |
| { | |
| "q": "What is deployment (production) vs development mode?", | |
| "answer": "Production uses secure, scalable config (e.g. Gunicorn, HTTPS), while development is simpler and local.", | |
| "must": ["production", "development", "secure"] | |
| }, | |
| { | |
| "q": "What is containerization (Docker) in deployment?", | |
| "answer": "Encapsulating app + dependencies in container for portability, reproducibility, and isolation.", | |
| "must": ["Docker", "containerization", "portability"] | |
| }, | |
| { | |
| "q": "How do you secure API keys in web apps?", | |
| "answer": "Use environment variables, secrets management, never embed keys in frontend or version control.", | |
| "must": ["API keys", "environment variables", "secrets"] | |
| }, | |
| { | |
| "q": "What is input validation in web apps?", | |
| "answer": "Sanitize user input to prevent injection attacks (SQL, prompt, HTML) and ensure correct formats.", | |
| "must": ["validation", "sanitize", "injection"] | |
| }, | |
| { | |
| "q": "How to handle errors and exceptions in web APIs?", | |
| "answer": "Use try/except, return HTTP error codes (e.g. 400, 500), log errors, and provide safe defaults.", | |
| "must": ["error", "HTTP code", "logging"] | |
| }, | |
| { | |
| "q": "What is asynchronous request handling (async) in web frameworks?", | |
| "answer": "Using asynchronous handlers to process I/O-bound tasks (like API calls) without blocking server threads.", | |
| "must": ["asynchronous", "I/O", "non-blocking"] | |
| }, | |
| { | |
| "q": "Why use caching on the server side?", | |
| "answer": "To store responses for repeated inputs, reduce API calls, and improve performance.", | |
| "must": ["caching", "performance", "reduce"] | |
| }, | |
| { | |
| "q": "What are environment variables and config separation?", | |
| "answer": "Store sensitive settings (e.g. API endpoints, keys) outside code, loaded at runtime via config files or env vars.", | |
| "must": ["environment variables", "config", "sensitive"] | |
| }, | |
| { | |
| "q": "Explain how to deploy a Flask app to a cloud service (e.g. Heroku, AWS)", | |
| "answer": "Push code, set env vars, configure web server (Gunicorn), setup domain/SSL, manage scaling.", | |
| "must": ["deploy", "Heroku", "Gunicorn"] | |
| }, | |
| { | |
| "q": "How would you containerize and deploy an NLP web app with Docker + Kubernetes?", | |
| "answer": "Write Dockerfile, build image, deploy to cluster, manage scaling, load balancing, config maps.", | |
| "must": ["Kubernetes", "load balancing", "scaling"] | |
| }, | |
| { | |
| "q": "What is rate limiting and why is it useful for APIs?", | |
| "answer": "Throttle request rates to protect backend from overload or abuse (e.g. too many LLM calls).", | |
| "must": ["rate limiting", "throttle", "protect"] | |
| }, | |
| { | |
| "q": "What is logging and monitoring in deployed web apps?", | |
| "answer": "Track requests, errors, latency; use tools (e.g. Prometheus, Grafana) to observe system health.", | |
| "must": ["logging", "monitoring", "latency"] | |
| }, | |
| { | |
| "q": "What is reverse proxy (e.g. Nginx) and how is it used?", | |
| "answer": "Proxy server in front of app server to handle SSL, load balancing, URL routing, static files.", | |
| "must": ["reverse proxy", "Nginx", "routing"] | |
| }, | |
| { | |
| "q": "What is output structuring in LLM systems?", | |
| "answer": "Designing prompts or constraints so model outputs follow a desired structured format (JSON, XML, template).", | |
| "must": ["structuring", "format", "template"] | |
| }, | |
| { | |
| "q": "What is a template-based output? Give an example.", | |
| "answer": "Predefined skeleton with placeholders, e.g. “{ “title”: __, “summary”: __ }”, forcing consistent layout.", | |
| "must": ["template", "placeholder", "layout"] | |
| }, | |
| { | |
| "q": "How do JSON and XML formatting help in structured outputs?", | |
| "answer": "They provide machine-readable, schema-based formats that allow downstream parsing and validation.", | |
| "must": ["JSON", "XML", "schema"] | |
| }, | |
| { | |
| "q": "Why use markdown or other markup languages in LLM output?", | |
| "answer": "They enable readable rich text (headings, lists, tables) while being parseable by applications.", | |
| "must": ["markdown", "markup", "readable"] | |
| }, | |
| { | |
| "q": "What is controlling temperature and sampling in output control?", | |
| "answer": "Adjusting temperature or top-k/top-p changes output randomness, helping enforce more predictable structure.", | |
| "must": ["temperature", "sampling", "control"] | |
| }, | |
| { | |
| "q": "What are stop sequences and length constraints in output control?", | |
| "answer": "Stop sequences specify termination tokens; length constraints (min/max) bound output size.", | |
| "must": ["stop sequence", "length constraint", "termination"] | |
| }, | |
| { | |
| "q": "What strategies enforce output formats (e.g. JSON)?", | |
| "answer": "Prompt templates, post-checking & correction, mask tokens, few-shot examples with format enforcement.", | |
| "must": ["format enforcement", "post-checking", "mask"] | |
| }, | |
| { | |
| "q": "How do you validate structured output from LLMs?", | |
| "answer": "Using JSON/XML schema validation, regex checks, fallback logic, or retries upon malformed output.", | |
| "must": ["validation", "schema", "fallback"] | |
| }, | |
| { | |
| "q": "What error-handling techniques apply when output is malformed?", | |
| "answer": "Retry generation, fallback to simpler prompt, error correction heuristics, or human fallback.", | |
| "must": ["error-handling", "retry", "fallback"] | |
| }, | |
| { | |
| "q": "What is the trade-off between strict formatting and natural response flexibility?", | |
| "answer": "Strict format reduces flexibility or fluency; flexible responses risk invalid structure or inconsistency.", | |
| "must": ["trade-off", "fluency", "structure"] | |
| }, | |
| { | |
| "q": "How to parse LLM output reliably in applications?", | |
| "answer": "Use robust parsers tolerant to minor deviations, validate, and fallback to repair routines.", | |
| "must": ["parse", "robust", "repair"] | |
| }, | |
| { | |
| "q": "Why is format consistency important in multi-turn systems?", | |
| "answer": "To maintain structured data across turns, avoid parser failures, and simplify downstream logic.", | |
| "must": ["consistency", "multi-turn", "parser"] | |
| }, | |
| { | |
| "q": "What is a fallback mechanism for malformed outputs?", | |
| "answer": "If parsing fails, revert to plain text, ask model to reformat, or return default value.", | |
| "must": ["fallback", "parsing fails", "default"] | |
| }, | |
| { | |
| "q": "What does advanced output control cover beyond fundamentals?", | |
| "answer": "Techniques like dynamic templates, in-prompt validators, constrained decoding, and self-checking loops.", | |
| "must": ["constrained decoding", "self-checking", "dynamic template"] | |
| }, | |
| { | |
| "q": "How do you build a system for structured output in a project?", | |
| "answer": "Design templates, integrate validation modules, handle errors, and support multiple formats (JSON, XML).", | |
| "must": ["system", "validation module", "multiple formats"] | |
| }, | |
| { | |
| "q": "What is the role of format enforcement in real-world applications?", | |
| "answer": "Ensures integrations (APIs, downstream modules) can reliably parse and consume outputs.", | |
| "must": ["format enforcement", "integration", "parse"] | |
| }, | |
| { | |
| "q": "How can you combine prompting and programmatic postprocessing?", | |
| "answer": "Prompt to encourage format, then parse output and correct or sanitize via code where needed.", | |
| "must": ["postprocessing", "sanitize", "prompt"] | |
| }, | |
| { | |
| "q": "What is the assignment objective for Week 12?", | |
| "answer": "Implement multiple output structuring methods, validate formats, handle errors, and build parsers.", | |
| "must": ["output structuring", "validation", "parsers"] | |
| }, | |
| { | |
| "q": "Give an example of enforcing stop sequences to prevent extraneous text.", | |
| "answer": "Include “<END>” marker in template and set it as stop sequence so model halts output beyond it.", | |
| "must": ["stop sequence", "marker", "halt"] | |
| }, | |
| { | |
| "q": "What is format drift and how to guard against it?", | |
| "answer": "When model format changes over time; guard via validation, prompting examples, and checks on outputs.", | |
| "must": ["format drift", "validation", "checks"] | |
| }, | |
| { | |
| "q": "How do you design fallback strategies for structuring failures?", | |
| "answer": "Combine retries, simpler prompts, fallback parsers, or human review when structure fails repeatedly.", | |
| "must": ["fallback", "retries", "review"] | |
| } | |
| ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment