{
  "FEATURE_METADATA": {
    "feature_name": "AI Intent Phrase Enhancement & Entity-Aware Training Data Builder",
    "module_path": "modules/phraseEnhancer",
    "version": "1.0.0",
    "status": "blueprint",
    "depends_on": [
      "modules/dialogflow/models/Intent.js",
      "modules/dialogflow/models/Agent.js",
      "modules/dialogflow/models/Entity.js",
      "modules/agentArchitect/services/modelConnector.service.js",
      "modules/dialogflow/services/publish.service.js"
    ],
    "new_components": [
      "PhraseEnhancerJob (MongoDB model)",
      "PhraseEnhancerVersion (MongoDB model)",
      "phraseInterpreter.service.js",
      "existingIntentFetcher.service.js",
      "userPhraseParser.service.js",
      "entityDescriptionParser.service.js",
      "phraseGenerationEngine.service.js",
      "entityAwarePhraseEngine.service.js",
      "phraseDiversityEngine.service.js",
      "phraseQualityValidator.service.js",
      "intentBoundaryGuard.service.js",
      "duplicateNoiseFilter.service.js",
      "phraseMergeStrategy.service.js",
      "phraseOrchestrator.service.js",
      "phraseEnhancer.controller.js",
      "phraseEnhancer.routes.js"
    ],
    "reuses_without_change": [
      "modules/agentArchitect/services/modelConnector.service.js",
      "modules/agentArchitect/services/jobQueue.service.js",
      "modules/dialogflow/middleware/validation.js"
    ],
    "api_base_path": "/phrase-enhancer",
    "route_mount": "app.use('/phrase-enhancer', phraseEnhancerRouter)"
  },

  "FEATURE_OVERVIEW": {
    "summary": "AI Intent Phrase Enhancement is a backend feature that takes a user's natural-language improvement request and uses a multi-stage generative AI pipeline to produce, validate, and deliver a production-ready phrase set for any intent. The system reads user-provided example phrases, entity descriptions, and the current intent state — then generates improved, categorized, entity-aware, diversity-checked, intent-boundary-protected training phrases. The output is previewed before save, supports merge/replace strategies, and includes QA test phrase generation.",
    "what_it_replaces": "Manual phrase authoring, hand-crafted entity-aware examples, ad-hoc phrase diversity work.",
    "what_it_does_not_replace": "Core dialogue runtime (dialogRuntime.service.js), intent matching pipeline, entity extraction pipeline — all unchanged.",
    "plug_in_point": "Phrases generated by this feature are saved as DfIntent.trainingPhrases[] via the existing publish.service.js pipeline, identical to how manually-authored phrases are stored and indexed."
  },

  "SYSTEM_OBJECTIVES": [
    "Accept any phrase improvement request — complaint-only, examples-only, entities-only, or combined.",
    "Automatically detect whether the request is CREATE_NEW, IMPROVE, CLEANUP, TIGHTEN, EXPAND, or other task mode.",
    "Load existing intent phrases and entity definitions from DB to inform generation.",
    "Parse user-provided phrase examples and extract structural patterns from them.",
    "Parse user-provided entity descriptions and understand slot semantics.",
    "Generate base phrase candidates covering direct, conversational, formal, casual, and indirect phrasing.",
    "Generate entity-aware phrases: single-entity, multi-entity, entity-order variations, optional-entity variations.",
    "Protect intent semantic boundary — detect and exclude phrases that drift into nearby intents.",
    "Validate phrase packs: duplicates, near-duplicates, too-generic, out-of-scope, entity coverage gaps.",
    "Score phrase pack quality across 9 dimensions. Auto-refine if score below threshold.",
    "Preview generated phrases grouped by category before save.",
    "Support merge strategies: replace_all, merge_new, add_only, regenerate_weak, preserve_locked.",
    "Generate QA test phrases: positive examples, hard negatives, entity extraction examples, boundary tests.",
    "Maintain full job history and phrase version diffs for audit."
  ],

  "SUPPORTED_USER_REQUESTS": [
    { "id": "UR-01", "mode": "IMPROVE_EXISTING_INTENT_PHRASES",   "example": "This intent has weak phrases, improve it" },
    { "id": "UR-02", "mode": "IMPROVE_EXISTING_INTENT_PHRASES",   "example": "This intent is not being detected correctly, improve phrases" },
    { "id": "UR-03", "mode": "IMPROVE_EXISTING_INTENT_PHRASES",   "example": "Here are 5 example phrases, create 50 more like them" },
    { "id": "UR-04", "mode": "CREATE_NEW_INTENT_PHRASES",         "example": "This intent has no good phrases, generate a full set" },
    { "id": "UR-05", "mode": "GENERATE_ENTITY_AWARE_PHRASES",     "example": "Use these entities in training phrases" },
    { "id": "UR-06", "mode": "GENERATE_ENTITY_AWARE_PHRASES",     "example": "These phrases should detect phone number + website together" },
    { "id": "UR-07", "mode": "EXPAND_MULTI_SLOT_PHRASES",         "example": "Generate phrases where user gives business name, category, and phone together" },
    { "id": "UR-08", "mode": "TIGHTEN_INTENT_BOUNDARY",           "example": "Do not make the intent broad. Keep it only for campaign analytics" },
    { "id": "UR-09", "mode": "GENERATE_MULTILINGUAL_PHRASES",     "example": "Add Hinglish phrases too" },
    { "id": "UR-10", "mode": "GENERATE_SHORT_REPLY_PHRASES",      "example": "Add short user reply phrases for slot-filling context" },
    { "id": "UR-11", "mode": "CLEANUP_DUPLICATE_PHRASES",         "example": "Remove duplicate and overlapping phrases from this intent" },
    { "id": "UR-12", "mode": "PHRASE_GAP_ANALYSIS",               "example": "Identify what phrase patterns are missing from this intent" },
    { "id": "UR-13", "mode": "PHRASE_SET_REFINEMENT",             "example": "Rebuild this intent training data using these examples and entity descriptions" },
    { "id": "UR-14", "mode": "IMPROVE_EXISTING_INTENT_PHRASES",   "example": "Add more phrases for manual onboarding flow" },
    { "id": "UR-15", "mode": "IMPROVE_EXISTING_INTENT_PHRASES",   "example": "Add formal + casual phrase variants" },
    { "id": "UR-16", "mode": "GENERATE_ENTITY_AWARE_PHRASES",     "example": "This intent should capture both website and Facebook account status" },
    { "id": "UR-17", "mode": "IMPROVE_EXISTING_INTENT_PHRASES",   "example": "This chatbot trigger intent needs more natural user phrasing" },
    { "id": "UR-18", "mode": "EXPAND_MULTI_SLOT_PHRASES",         "example": "Add phrases for template intent covering quick reply, URL button, and placeholder" }
  ],

  "USER_EXPERIENCE_FLOW": {
    "steps": [
      "1. User selects target agent from dropdown",
      "2. User selects target intent from that agent's intent list",
      "3. UI shows current phrase count + current phrase quality score (if previously scored)",
      "4. User types improvement request in text box (free text)",
      "5. Optional: User pastes example phrases in textarea (one per line)",
      "6. Optional: User adds entity descriptions (name + description pairs)",
      "7. Optional: User sets desired_phrase_count, language_mode, tone_mode, merge_strategy",
      "8. Optional: User selects nearby_competing_intents to protect against",
      "9. User clicks 'AI Enhance Phrases'",
      "10. System queues background job, returns jobId",
      "11. UI polls GET /phrase-enhancer/jobs/:jobId/status (stage name + percent_complete)",
      "12. On completion: UI loads GET /phrase-enhancer/jobs/:jobId/preview",
      "13. Preview shows phrase categories: direct, conversational, entity-rich, short-reply, multilingual",
      "14. Preview shows removed duplicates, conflict-risk notes, quality score breakdown",
      "15. User can: Accept All, Accept by Category, Reject & Regenerate Section, Edit Individual Phrases",
      "16. User clicks 'Save to Intent' → merge strategy applied → DfIntent updated → re-publish"
    ],
    "conflict_risk_display": "Phrases flagged as risk-of-overlap with nearby intents are shown with a warning icon and the competing intent name.",
    "category_toggle": "User can toggle visibility per phrase category before saving — e.g. accept all but multilingual phrases."
  },

  "HIGH_LEVEL_ARCHITECTURE": {
    "entry_point": "POST /phrase-enhancer/jobs → PhraseEnhancerController",
    "layer_1_interpret": "PhraseInterpreter → detect mode, extract intent target, detect user examples + entity hints",
    "layer_2_context":   "ExistingIntentFetcher → load DfIntent + DfAgent + nearby intents from DB",
    "layer_3_parse":     "UserPhraseParser + EntityDescriptionParser → structured representations",
    "layer_4_analyze":   "WeaknessAnalyzer → score current phrases, find gaps",
    "layer_5_generate":  "PhraseGenerationEngine + EntityAwarePhraseEngine → raw phrase candidates",
    "layer_6_protect":   "IntentBoundaryGuard → mark/remove phrases that drift to nearby intents",
    "layer_7_validate":  "PhraseQualityValidator + DuplicateNoiseFilter → validation report + clean set",
    "layer_8_assemble":  "PhraseDiversityEngine → categorize, balance, finalize phrase pack",
    "layer_9_qa":        "QATestPhraseGenerator → positive tests, hard negatives, boundary tests",
    "layer_10_preview":  "VersioningLayer → store draft version, expose preview API",
    "layer_11_save":     "PhraseMergeStrategy → on approval: merge into DfIntent → publishService re-publishes",
    "job_queue":         "Reuses modules/agentArchitect/services/jobQueue.service.js (no duplicate infrastructure)",
    "model_connector":   "Reuses modules/agentArchitect/services/modelConnector.service.js"
  },

  "CORE_MODULES": {
    "M01_PhraseInterpreter": {
      "file": "modules/phraseEnhancer/services/phraseInterpreter.service.js",
      "responsibility": "Parse the user's free-text improvement request. Detect task mode, identify target agent/intent (if named), extract inline phrase examples, detect entity mentions, detect constraints (strict_boundary, language_mode, tone_mode, desired count).",
      "inputs": "{ prompt_text, agentId?, intentId?, example_phrases?, entity_descriptions? }",
      "outputs": "{ mode: TaskMode, intentTarget: { agentId, intentId }, userExamples: [{text}], entityHints: [{name, description}], constraints: { strict_boundary, language_mode, tone_mode, desired_count }, confidence }",
      "fallback": "Keyword detection if model unavailable"
    },
    "M02_ExistingIntentFetcher": {
      "file": "modules/phraseEnhancer/services/existingIntentFetcher.service.js",
      "responsibility": "Load the target DfIntent from DB including all existing trainingPhrases, parameters, inputContexts, outputContexts. Also load all other intents in the same agent for boundary-protection context. Load DfEntity records for entity-aware generation.",
      "outputs": "{ intent: DfIntent, existingPhrases: [{text}], agent: DfAgent, entities: DfEntity[], allIntentNames: string[], nearbyIntents: [{ name, phrasesSample }] }",
      "note": "nearbyIntents = the top 5 intents with most similar names or shared keywords — used for boundary protection"
    },
    "M03_UserPhraseParser": {
      "file": "modules/phraseEnhancer/services/userPhraseParser.service.js",
      "responsibility": "Parse raw user-provided example phrases. Detect structural patterns: which entities are used, ordering patterns, sentence structure (subject-verb-object vs short-reply), entity density, language mix.",
      "inputs": "example_phrases: string[] (raw lines or JSON array)",
      "outputs": "ParsedUserExamples { phrases: [{text, entities_found: [], pattern_type: 'direct|conversational|slot_fill|multi_slot', language: 'en|hi|mixed'}], detected_entity_patterns: [], structural_patterns: [], language_distribution: {} }",
      "note": "Uses lightweight regex + entity name scan — no model call required"
    },
    "M04_EntityDescriptionParser": {
      "file": "modules/phraseEnhancer/services/entityDescriptionParser.service.js",
      "responsibility": "Parse user-provided entity descriptions into structured entity slot specs. Infer entity type (person, location, date, phone, URL, category, etc.). Generate slot placeholder names (e.g. {business_name}). Build entity combination matrix for multi-slot generation.",
      "inputs": "entity_descriptions: [{ name, description, examples? }]",
      "outputs": "ParsedEntityDescriptions { entities: [{ name, placeholder, inferred_type, examples, is_required, is_optional }], combination_matrix: [[e1, e2], [e1, e2, e3], ...], slot_ordering_variants: [] }",
      "note": "combination_matrix drives multi-slot phrase generation. All combinations up to 4 entities."
    },
    "M05_WeaknessAnalyzer": {
      "file": "part of phraseOrchestrator.service.js",
      "responsibility": "Inspect existing phrase set and score it across 9 dimensions. Identify: missing phrase types, entity coverage gaps, over-represented patterns, phrase distribution imbalance, short-reply absence, multilingual absence (if requested), duplicate/near-duplicate clusters.",
      "outputs": "WeaknessReport { scores: {}, gaps: [], recommendations: [], phrase_type_distribution: {}, entity_coverage: {}, duplicate_clusters: [] }"
    },
    "M06_PhraseGenerationEngine": {
      "file": "modules/phraseEnhancer/services/phraseGenerationEngine.service.js",
      "responsibility": "Core phrase generation. Takes intent description + parsed user examples + weakness analysis → calls LLM to generate phrase candidates organized by category. Categories: direct, conversational, formal, casual, paraphrase, indirect, problem_report, follow_up_continuation.",
      "ai_role": "Phrase Expansion Specialist",
      "generation_rules": [
        "Minimum 20 phrases per generation call",
        "At least 3 direct phrasing variants",
        "At least 3 conversational variants",
        "At least 3 casual short-command variants",
        "At least 5 paraphrase variants",
        "Include typo variants for 10-15% of phrases",
        "No phrase should be copy of user-provided example — always extend/vary",
        "Phrases must not exceed 120 characters each"
      ]
    },
    "M07_EntityAwarePhraseEngine": {
      "file": "modules/phraseEnhancer/services/entityAwarePhraseEngine.service.js",
      "responsibility": "Generate entity-rich phrase variants using the parsed entity descriptions. Uses the combination_matrix to generate single-entity, dual-entity, and multi-entity phrases. Generates both ordered and unordered entity combinations. Generates entity-optional phrases (entity mentioned inline vs implied).",
      "ai_role": "Entity-Aware Training Data Designer",
      "generation_rules": [
        "Generate at least 3 phrases per entity combination",
        "Generate slot-fill context short-replies (e.g. just the entity value)",
        "Generate entity-in-middle, entity-at-end, entity-at-start variants",
        "Generate natural language wrapping for each entity",
        "Never generate phrases where entity is forced awkwardly",
        "Generate optional-entity variants (phrase makes sense with or without entity)"
      ],
      "output_schema": "{ entity_phrase_sets: [{ entity_combo: ['@entity1', '@entity2'], phrases: [{text, slot_positions: []}] }] }"
    },
    "M08_IntentBoundaryGuard": {
      "file": "modules/phraseEnhancer/services/intentBoundaryGuard.service.js",
      "responsibility": "Protect intent semantic scope. Receives full candidate phrase set + nearby competing intent names/phrases. Runs boundary scoring: any phrase scoring > 0.70 cosine similarity to a competing intent's training phrases is flagged as high_risk. Phrases scoring 0.50-0.70 are flagged as medium_risk. Provides fix_suggestion for each risk.",
      "method": "Compute embedding similarity (USE 512-dim via embeddingEngine.js) between each candidate phrase and top phrases of competing intents.",
      "outputs": "ConflictRiskReport { high_risk: [{phrase, competing_intent, similarity, fix_suggestion}], medium_risk: [{...}], clean_phrases: [{text}] }",
      "note": "Reuses embeddingEngine.js — zero new dependency"
    },
    "M09_PhraseQualityValidator": {
      "file": "modules/phraseEnhancer/services/phraseQualityValidator.service.js",
      "responsibility": "Run all 12 validation checks on the candidate phrase set. Returns ValidationReport with pass/fail per check and severity.",
      "checks": [
        { "id": "V-PH-01", "name": "Minimum phrase count",       "severity": "error",   "rule": ">= 15 non-duplicate phrases required" },
        { "id": "V-PH-02", "name": "Zero identical duplicates",  "severity": "error",   "rule": "No two phrases with identical text (case-insensitive)" },
        { "id": "V-PH-03", "name": "Near-duplicate rate",        "severity": "warning", "rule": "Near-duplicate clusters (>85% word overlap) must be < 10% of total" },
        { "id": "V-PH-04", "name": "Generic phrase detection",   "severity": "warning", "rule": "Phrases shorter than 3 words or containing only stopwords flagged as too generic" },
        { "id": "V-PH-05", "name": "Intent scope drift",         "severity": "error",   "rule": "Zero high_risk phrases from IntentBoundaryGuard in final set" },
        { "id": "V-PH-06", "name": "Entity coverage",            "severity": "warning", "rule": "Each defined entity must appear in at least 5 phrases" },
        { "id": "V-PH-07", "name": "Multi-slot coverage",        "severity": "warning", "rule": "If agent has multi-entity intents: >= 5 multi-slot phrases required" },
        { "id": "V-PH-08", "name": "Direct phrase coverage",     "severity": "warning", "rule": ">= 10% of phrases must be direct intent expressions (not paraphrases)" },
        { "id": "V-PH-09", "name": "Conversational coverage",    "severity": "warning", "rule": ">= 15% of phrases must be conversational (question form or casual)" },
        { "id": "V-PH-10", "name": "Competing intent overlap",   "severity": "error",   "rule": "< 5% of phrases have cosine similarity > 0.70 with any nearby intent" },
        { "id": "V-PH-11", "name": "Language quality",           "severity": "error",   "rule": "No malformed, garbled, or non-sensical phrases" },
        { "id": "V-PH-12", "name": "Short reply coverage",       "severity": "warning", "rule": ">= 3 short-reply phrases (1-4 words) for slot-filling continuation context" }
      ]
    },
    "M10_DuplicateNoiseFilter": {
      "file": "modules/phraseEnhancer/services/duplicateNoiseFilter.service.js",
      "responsibility": "Remove exact duplicates. Cluster near-duplicates (>85% word overlap) and keep only the best representative. Remove too-generic phrases (< 3 meaningful words). Remove phrases with excessive stopword ratio. Remove any phrase already in existing intent phrases if merge_strategy is add_only.",
      "method_exact_dup": "Lowercase + strip punctuation → Set deduplication",
      "method_near_dup": "Jaccard similarity on word bigrams. Threshold 0.85.",
      "method_generic": "TF-IDF-style stopword ratio check. If >70% stopwords → flagged.",
      "output": "FilterReport { kept: [{text}], removed: [{text, reason, category}] }"
    },
    "M11_PhraseDiversityEngine": {
      "file": "modules/phraseEnhancer/services/phraseDiversityEngine.service.js",
      "responsibility": "Assemble and balance the final phrase pack from all generators. Ensures no single phrase category is over-represented. Ensures entity-rich phrases are distributed (not all at end). Assigns phrases to categories for preview display. Computes final quality score.",
      "category_targets": {
        "direct": "15-20%",
        "conversational": "20-25%",
        "entity_rich": "20-25%",
        "multi_slot": "10-15% (if entities defined)",
        "short_reply": "8-10%",
        "paraphrase": "10-15%",
        "formal": "5-8%",
        "casual": "5-8%",
        "multilingual": "0-15% (if language_mode includes Hindi/Hinglish)",
        "problem_report": "3-5%"
      }
    },
    "M12_PhraseMergeStrategy": {
      "file": "modules/phraseEnhancer/services/phraseMergeStrategy.service.js",
      "responsibility": "Apply the user's chosen merge_strategy when saving final phrases to the DfIntent. Handles locked_phrases (user-marked, never overwrite). Returns the final DfIntent-compatible trainingPhrases[] array.",
      "strategies": {
        "replace_all": "Discard all current phrases. Use only the generated set.",
        "merge_new": "Keep all current phrases. Add only generated phrases not already present.",
        "add_only": "Add generated phrases, never modify existing.",
        "regenerate_weak": "Remove phrases flagged as near-duplicate or generic from current set. Add all generated phrases.",
        "preserve_locked": "Same as merge_new but never touch phrases tagged with locked: true in the job request."
      },
      "output": "{ merged_phrases: [{text}], added_count: N, removed_count: N, preserved_count: N, summary: string }"
    },
    "M13_PhraseOrchestrator": {
      "file": "modules/phraseEnhancer/services/phraseOrchestrator.service.js",
      "responsibility": "Coordinates all 10 pipeline stages. Manages stage progression, DB updates after each stage, error handling, and refinement loop. Calls all sub-services in the correct order. Determines if auto-refinement is needed based on quality score.",
      "refinement_trigger": "quality_score < 65 OR ValidationReport has errors after Stage 7",
      "max_refinement_iterations": 2
    },
    "M14_QATestPhraseGenerator": {
      "file": "modules/phraseEnhancer/services/qaTestPhraseGenerator.service.js",
      "responsibility": "Generate QA test data for the improved intent. Produces: positive test phrases (clearly in-scope), hard negatives (similar but should NOT match), boundary tests (phrases near the scope edge), entity extraction examples (phrases with annotated entity values), confusion examples (phrases users might expect to match but shouldn't).",
      "ai_role": "QA Test Designer",
      "outputs": "QATestPack { positive_tests: [{text, expected_intent, expected_entities}], hard_negatives: [{text, should_not_match: intentName, reason}], boundary_tests: [{text, expected_intent, risk_level}], entity_extraction_tests: [{text, entities: {name: value}}] }"
    },
    "M15_VersioningLayer": {
      "file": "modules/phraseEnhancer/services/phraseVersioning.service.js",
      "responsibility": "Store each generation iteration as a PhraseEnhancerVersion. Support diff between versions. Support rollback. Expose via preview API.",
      "version_states": ["DRAFT", "REFINED", "VALIDATED", "HUMAN_APPROVED", "SAVED_TO_INTENT"]
    },
    "M16_PhraseEnhancerController": {
      "file": "modules/phraseEnhancer/controllers/phraseEnhancer.controller.js",
      "responsibility": "HTTP controller for all phrase enhancer endpoints."
    }
  },

  "REQUEST_PAYLOAD_MODEL": {
    "description": "POST /phrase-enhancer/jobs request body",
    "required_fields": ["agentId", "intentId", "prompt_text"],
    "optional_fields_all_handled_intelligently": true,
    "schema": {
      "agentId":                   "ObjectId — target DfAgent",
      "intentId":                  "ObjectId — target DfIntent (null = create new phrase set)",
      "prompt_text":               "string — free-text improvement request (required)",
      "example_phrases_from_user": "string[] — raw example phrases provided by user (optional)",
      "entity_descriptions": [
        { "name": "string — entity name (e.g. business_name)", "description": "string — what this entity represents", "examples": "string[] — example values (optional)" }
      ],
      "desired_phrase_count":    "number — target total phrase count (default: 50)",
      "language_mode":           "enum: english | hindi | hinglish | mixed (default: english)",
      "tone_mode":               "enum: formal | casual | mixed (default: mixed)",
      "merge_strategy":          "enum: replace_all | merge_new | add_only | regenerate_weak | preserve_locked (default: merge_new)",
      "strict_intent_boundary":  "boolean — if true, high-boundary protection, fewer edge phrases (default: true)",
      "nearby_competing_intents": "ObjectId[] — intent IDs to explicitly protect against (optional, auto-detected if not provided)",
      "current_issues":          "string — description of what is wrong with current phrases (optional)",
      "locked_phrases":          "string[] — phrases to never remove (used with preserve_locked strategy)",
      "include_qa_pack":         "boolean — generate QA test phrases after completion (default: false)"
    }
  },

  "INTERNAL_AI_PIPELINE": {
    "stages": [
      {
        "stage": 1,
        "name": "UNDERSTAND",
        "ai_role": "Phrase Improvement Classifier",
        "model_call": true,
        "input": "prompt_text + agentId + intentId",
        "output": "{ mode, targetIntentId, userExamples, entityHints, constraints }",
        "prompt_template_key": "UNDERSTAND_IMPROVEMENT_REQUEST",
        "max_tokens": 400,
        "fallback": "keyword classification"
      },
      {
        "stage": 2,
        "name": "PARSE_EXAMPLES",
        "ai_role": "none — programmatic",
        "model_call": false,
        "input": "example_phrases_from_user",
        "output": "ParsedUserExamples: patterns, entity occurrences, language distribution",
        "note": "Regex + entity name scan only. No model call needed."
      },
      {
        "stage": 3,
        "name": "PARSE_ENTITIES",
        "ai_role": "none — programmatic",
        "model_call": false,
        "input": "entity_descriptions",
        "output": "ParsedEntityDescriptions: slots, combination_matrix",
        "note": "Programmatic parsing of entity name/description pairs."
      },
      {
        "stage": 4,
        "name": "ANALYZE_WEAKNESS",
        "ai_role": "Phrase Quality Auditor",
        "model_call": "partial",
        "input": "existing_phrases + ParsedUserExamples + ParsedEntityDescriptions",
        "output": "WeaknessReport: gaps, missing types, entity coverage holes",
        "prompt_template_key": "ANALYZE_EXISTING_PHRASES",
        "max_tokens": 800,
        "programmatic_part": "DuplicateNoiseFilter + V-PH-01 to V-PH-12 structural checks"
      },
      {
        "stage": 5,
        "name": "GENERATE_BASE_PHRASES",
        "ai_role": "Phrase Expansion Specialist",
        "model_call": true,
        "input": "intent description + WeaknessReport + ParsedUserExamples + constraints",
        "output": "BasePhraseSet: direct, conversational, formal, casual, paraphrase, indirect categories",
        "prompt_template_key": "GENERATE_BASE_PHRASES",
        "max_tokens": 3000,
        "note": "Largest call. May be split into category sub-calls if token budget exceeded."
      },
      {
        "stage": 6,
        "name": "GENERATE_ENTITY_PHRASES",
        "ai_role": "Entity-Aware Training Data Designer",
        "model_call": "conditional",
        "condition": "ParsedEntityDescriptions.entities.length > 0 OR mode == GENERATE_ENTITY_AWARE_PHRASES OR mode == EXPAND_MULTI_SLOT_PHRASES",
        "input": "ParsedEntityDescriptions + combination_matrix + BasePhraseSet",
        "output": "EntityPhraseSet: single-entity, multi-slot, entity-order variants, entity-optional",
        "prompt_template_key": "GENERATE_ENTITY_AWARE_PHRASES",
        "max_tokens": 2000
      },
      {
        "stage": 7,
        "name": "VALIDATE_AND_PROTECT",
        "ai_role": "QA Validator",
        "model_call": "partial",
        "input": "BasePhraseSet + EntityPhraseSet + nearby_competing_intents",
        "output": "ValidationReport (V-PH-01 to V-PH-12) + ConflictRiskReport",
        "prompt_template_key": "DETECT_BOUNDARY_DRIFT",
        "max_tokens": 600,
        "programmatic_part": "IntentBoundaryGuard (embedding cosine similarity) + DuplicateNoiseFilter",
        "note": "High_risk phrases removed automatically. Medium_risk flagged for human review."
      },
      {
        "stage": 8,
        "name": "FILTER_AND_CLEAN",
        "ai_role": "none — programmatic",
        "model_call": false,
        "input": "ValidationReport + ConflictRiskReport + all phrase sets",
        "output": "CleanPhraseSet (no duplicates, no high-risk, no noise)",
        "note": "DuplicateNoiseFilter removes exact dups, near-dups, generic phrases, out-of-scope phrases."
      },
      {
        "stage": 9,
        "name": "BUILD_PHRASE_PACK",
        "ai_role": "none — programmatic",
        "model_call": "conditional",
        "condition": "quality_score < 65 → trigger REFINE_PHRASE_PACK call",
        "input": "CleanPhraseSet + PhraseDiversityEngine balancing",
        "output": "FinalPhrasePack: categorized, balanced, quality-scored",
        "prompt_template_key": "REFINE_PHRASE_PACK",
        "max_tokens": 2000,
        "max_refinement_iterations": 2
      },
      {
        "stage": 10,
        "name": "GENERATE_QA",
        "ai_role": "QA Test Designer",
        "model_call": "conditional",
        "condition": "include_qa_pack == true",
        "input": "FinalPhrasePack + intent definition + nearby_competing_intents",
        "output": "QATestPack: positive_tests, hard_negatives, boundary_tests, entity_extraction_tests",
        "prompt_template_key": "GENERATE_PHRASE_TEST_PACK",
        "max_tokens": 2000
      }
    ]
  },

  "USER_PHRASE_PARSER_DESIGN": {
    "input_formats_supported": [
      "newline-separated plain text",
      "JSON array of strings",
      "comma-separated string",
      "numbered list (1. phrase\\n2. phrase)",
      "bullet list (- phrase\\n- phrase)",
      "inline within prompt_text (detected and extracted)"
    ],
    "inline_extraction_from_prompt": {
      "pattern": "Quoted phrases in prompt: 'phrase1', 'phrase2' → extracted as user examples",
      "pattern_2": "After phrases like 'for example:' or 'such as:' → extract following lines as examples",
      "pattern_3": "Numbered list within prompt text → extract as examples"
    },
    "structural_analysis_per_phrase": {
      "entity_scan": "Check each phrase against known entity names/synonyms from DfEntity records",
      "pattern_type_detection": {
        "direct": "starts with imperative verb or clear action (book, cancel, create, set up)",
        "conversational": "contains question words (how, can you, I want to) or conditional (if, when)",
        "slot_fill": "very short (1-4 words) — likely a slot-fill continuation reply",
        "multi_slot": "contains 2+ entity references in same phrase",
        "problem_report": "contains problem indicators (not working, issue, error, failed, wrong)"
      },
      "language_detection": "detect English vs Hindi vs Hinglish using unicode range + known Hindi words",
      "output_per_phrase": "{ text, pattern_type, entities_found, language, word_count }"
    },
    "inference_from_patterns": {
      "description": "After parsing, PhraseDiversityEngine infers what ADDITIONAL types are needed based on what patterns are missing. If user gave 5 direct phrases → system generates conversational variants. If user gave entity-rich phrases → system generates entity-light variants.",
      "gap_detection": "Compute type_distribution of user examples. Missing types become generation targets."
    }
  },

  "ENTITY_DESCRIPTION_PARSER_DESIGN": {
    "input_format": [
      "JSON array: [{ name, description, examples }]",
      "Inline from prompt: 'entity: business_name, description: the name of a registered business'",
      "Key-value pairs pasted as text"
    ],
    "parsing_steps": [
      "1. Extract entity name (normalize to lowercase_snake_case)",
      "2. Extract description text",
      "3. Infer entity semantic type: PERSON_NAME | BUSINESS_NAME | PHONE | URL | DATE | NUMBER | CATEGORY | METHOD | STATUS | CUSTOM",
      "4. Generate placeholder token: {business_name}, {phone_number}, etc.",
      "5. Infer is_required vs is_optional from description text (words like 'optional', 'may', 'if available')",
      "6. Extract example values if provided",
      "7. Build combination_matrix: all 1-, 2-, 3-entity combinations (max 4 to avoid explosion)"
    ],
    "combination_matrix_example": {
      "entities": ["business_name", "business_category", "phone_number"],
      "combinations": [
        ["business_name"],
        ["business_category"],
        ["phone_number"],
        ["business_name", "business_category"],
        ["business_name", "phone_number"],
        ["business_category", "phone_number"],
        ["business_name", "business_category", "phone_number"]
      ]
    },
    "entity_phrase_templates": {
      "description": "For each combination, PhraseGenerationEngine uses these structural templates as generation seeds:",
      "templates": [
        "My {entity1} is [value] and my {entity2} is [value]",
        "{entity1} is [value], {entity2} is [value]",
        "[value] is my {entity1} and [value] is {entity2}",
        "I have {entity1} as [value]",
        "[value] — for {entity1}",
        "Here is my {entity1}: [value]"
      ]
    }
  },

  "EXISTING_INTENT_CONTEXT_FETCHER": {
    "file": "modules/phraseEnhancer/services/existingIntentFetcher.service.js",
    "what_it_loads": [
      "DfIntent record (all fields including trainingPhrases[], parameters[], contexts)",
      "DfAgent record (settings, intentThreshold)",
      "ALL DfIntent records for same agent (for boundary detection — only names + 5 sample phrases each)",
      "DfEntity records for same agent (for entity coverage check)",
      "DfAgentVersion.snapshot.intents (if published — includes phraseEmbeddings for cosine comparison)"
    ],
    "nearby_intent_detection": {
      "method": "If user provided nearby_competing_intents[] → use those. Otherwise auto-detect top 5 most similar by name similarity (Levenshtein on intent names) + shared keywords in existing phrases.",
      "phrase_sample": "Load only 10 sample phrases per nearby intent to limit token usage"
    },
    "caching": "Cache DfAgent + DfEntity per agentId for 60 seconds to avoid repeated DB reads across multiple concurrent jobs"
  },

  "PHRASE_GENERATION_ENGINE": {
    "design": "Multi-pass LLM generation strategy. Stage 5 is the core base phrase generation. Organized into sub-generation calls if desired_phrase_count > 60 to avoid single-call token exhaustion.",
    "sub_calls": [
      { "sub": "A", "target_category": "direct + conversational",     "max_tokens": 1200, "count_target": "30-40% of desired" },
      { "sub": "B", "target_category": "formal + casual + paraphrase","max_tokens": 1000, "count_target": "30-40% of desired" },
      { "sub": "C", "target_category": "short_reply + problem_report + indirect", "max_tokens": 800, "count_target": "20-30% of desired" }
    ],
    "generation_constraints_passed_to_model": [
      "Do NOT repeat the user's example phrases verbatim",
      "Do NOT generate phrases outside the intent's topic scope",
      "Avoid phrases that could match {{nearby_intents}}",
      "Include typo variants for approximately 10-15% of phrases",
      "Include Hinglish variants if language_mode includes hindi/hinglish",
      "Keep each phrase under 120 characters",
      "Vary sentence structure — do not repeat the same grammatical template more than 3 times"
    ],
    "inference_beyond_examples": {
      "description": "The model is explicitly instructed to INFER additional phrase patterns not shown by the user's examples. For example, if user gave 5 direct phrases, the model must generate conversational forms, question forms, typo forms, and continuation replies even if none were provided.",
      "prompt_instruction": "The user's example phrases are seeds, not constraints. Generate diverse phrase variations the user has NOT provided. Think about: how a frustrated user might phrase it, how a new user would phrase it, how a power user would phrase it, how a non-English user might phrase it."
    }
  },

  "ENTITY_AWARE_PHRASE_ENGINE": {
    "design": "Separate Stage 6 call. Only runs if entity descriptions are provided OR if mode is GENERATE_ENTITY_AWARE_PHRASES / EXPAND_MULTI_SLOT_PHRASES.",
    "entity_phrase_types": [
      { "type": "single_entity",   "example": "My phone number is {phone_number}", "count": "3 per entity" },
      { "type": "entity_at_start", "example": "{business_name} is looking to onboard", "count": "2 per entity" },
      { "type": "entity_at_end",   "example": "Looking to register with {business_name}", "count": "2 per entity" },
      { "type": "dual_entity",     "example": "{business_name} offers {service_type}", "count": "3 per pair" },
      { "type": "multi_slot",      "example": "We are {business_name} in {category} and our contact is {phone_number}", "count": "3 per triple" },
      { "type": "entity_optional", "example": "Wants to complete registration (entity may or may not be mentioned)", "count": "3 per entity" },
      { "type": "short_slot_fill", "example": "{phone_number} — just the value, for slot-filling context", "count": "2 per entity" },
      { "type": "natural_combined","example": "Business name is {business_name}, category is {category}, number is {phone_number}", "count": "2 per triple" }
    ],
    "entity_ordering_permutation": "For N entities, generate phrases using at least (N-1)!/2 orderings to avoid positional bias in NLU training.",
    "entity_value_annotation": "Generated phrases include slot position metadata: { text: 'My phone is {phone}', slots: [{entity: '@phone', position: 'end'}] }"
  },

  "INTENT_BOUNDARY_PROTECTION_ENGINE": {
    "file": "modules/phraseEnhancer/services/intentBoundaryGuard.service.js",
    "how_it_works": [
      "1. Load published DfAgentVersion snapshot (if available) — has phraseEmbeddings per intent",
      "2. For each candidate phrase: compute 512-dim USE embedding via embeddingEngine.generateEmbedding()",
      "3. Compute cosine similarity between phrase embedding and centroid embedding of each nearby intent",
      "4. Score: high_risk (>0.70), medium_risk (0.50-0.70), clean (<0.50)",
      "5. high_risk phrases are automatically removed from final set",
      "6. medium_risk phrases are flagged in preview with warning label",
      "7. Each flagged phrase gets: competing_intent name + similarity score + fix_suggestion"
    ],
    "fix_suggestion_generation": {
      "method": "Simple template: 'Consider rephrasing to be more specific about X instead of Y'",
      "no_model_call_needed": true
    },
    "strict_mode": "When strict_intent_boundary=true, medium_risk threshold drops to 0.40 (tighter protection)",
    "fallback_without_embeddings": "If embeddingEngine not ready: fallback to keyword overlap check. Flag phrases sharing >3 content words with nearby intent training phrases."
  },

  "PHRASE_QUALITY_VALIDATION_ENGINE": {
    "file": "modules/phraseEnhancer/services/phraseQualityValidator.service.js",
    "quality_dimensions": [
      { "id": "QD-01", "name": "Semantic Precision",    "weight": 20, "scoring": "1 - (out_of_scope_rate). Measured by boundary guard." },
      { "id": "QD-02", "name": "Phrase Diversity",      "weight": 15, "scoring": "1 - type_concentration_index. Measures balance across 10 categories." },
      { "id": "QD-03", "name": "Entity Coverage",       "weight": 15, "scoring": "entities_with_5_plus_phrases / total_entities. 0 if no entities defined." },
      { "id": "QD-04", "name": "Multi-Slot Coverage",   "weight": 10, "scoring": "multi_slot_phrases / target_count. Capped at 1.0." },
      { "id": "QD-05", "name": "Short Reply Coverage",  "weight": 5,  "scoring": "min(short_reply_phrases, 3) / 3. Max 3 needed." },
      { "id": "QD-06", "name": "Multilingual Quality",  "weight": 5,  "scoring": "Only scored if language_mode != english. Proportion of valid Hinglish phrases." },
      { "id": "QD-07", "name": "Overlap Risk",          "weight": 15, "scoring": "1 - (high_risk_rate + 0.5*medium_risk_rate)." },
      { "id": "QD-08", "name": "Duplicate Rate",        "weight": 10, "scoring": "1 - duplicate_cluster_rate." },
      { "id": "QD-09", "name": "Direct Coverage",       "weight": 5,  "scoring": "direct_phrases / total * 100 / 10. Target 10% direct." }
    ],
    "composite_formula": "weighted_sum(dimension_scores) where weights sum to 100",
    "grade_thresholds": {
      "A": "85-100 — excellent, auto-approve eligible",
      "B": "70-84 — good, approve with minor notes",
      "C": "55-69 — trigger auto-refinement pass",
      "D": "below 55 — require human review"
    }
  },

  "MERGE_REPLACE_STRATEGY_DESIGN": {
    "strategies": {
      "replace_all": {
        "description": "Remove all existing phrases. Use only the AI-generated set.",
        "warning": "Irreversible without version rollback. Confirmation required.",
        "use_when": "Existing phrases are completely wrong or intent is being redesigned from scratch."
      },
      "merge_new": {
        "description": "Keep all existing phrases. Add generated phrases that are not already present (de-duplicated before add).",
        "use_when": "Existing phrases are OK but need expansion."
      },
      "add_only": {
        "description": "Add only new phrases. Never remove existing ones even if they are weak.",
        "use_when": "User wants to expand without any risk to existing data."
      },
      "regenerate_weak": {
        "description": "Run DuplicateNoiseFilter on existing phrases. Remove detected duplicates/generic from existing. Add all generated phrases.",
        "use_when": "Existing phrases have quality issues (duplicates, too-generic) that need cleanup."
      },
      "preserve_locked": {
        "description": "Same as merge_new but phrases tagged in locked_phrases[] are never removed regardless of quality issues.",
        "use_when": "Some manually-crafted phrases are business-critical and must be preserved."
      }
    },
    "output_diff": "Every save action generates a diff: { added: string[], removed: string[], preserved: string[] } stored in PhraseEnhancerVersion.change_summary",
    "rollback": "PhraseEnhancerVersion history allows restoring any previous phrase set via rollback endpoint."
  },

  "VERSIONING_AND_PREVIEW_FLOW": {
    "version_model": "PhraseEnhancerVersion",
    "version_states": ["DRAFT", "REFINED", "VALIDATED", "HUMAN_APPROVED", "SAVED_TO_INTENT"],
    "preview_grouping": {
      "direct":         "Direct intent phrases",
      "conversational": "Conversational phrasing",
      "entity_rich":    "Entity-aware phrases (grouped by entity combination)",
      "multi_slot":     "Multi-slot phrases",
      "short_reply":    "Short replies (for slot-fill context)",
      "formal":         "Formal tone phrases",
      "casual":         "Casual / short-command phrases",
      "multilingual":   "Hinglish / Hindi phrases (if generated)",
      "paraphrase":     "Paraphrase variants",
      "problem_report": "Problem/complaint framing phrases",
      "removed":        "Removed (duplicates + out-of-scope) — shown collapsed by default",
      "conflict_risk":  "Conflict-risk phrases (medium risk) — shown with warning"
    },
    "preview_metadata_per_phrase": "{ text, category, entities_used: [], language, source: ai_generated|user_provided|existing, conflict_risk: none|medium|high }",
    "diff_api": "GET /phrase-enhancer/jobs/:jobId/diff?from=v1&to=v2 → { added[], removed[], preserved[] }"
  },

  "MODEL_CONNECTOR_DESIGN": {
    "reuses": "modules/agentArchitect/services/modelConnector.service.js — zero duplication",
    "env_vars": "Same AI_ARCHITECT_* env vars. No new configuration needed.",
    "phrase_specific_settings": {
      "temperature": 0.4,
      "note": "Slightly higher temperature than agent generation (0.3) to get more diverse phrase variations. Override per call."
    },
    "call_budget_per_job": {
      "stage_1":  "max_tokens: 400",
      "stage_4":  "max_tokens: 800",
      "stage_5":  "max_tokens: 3000 (may split into 3 sub-calls of 1000 each)",
      "stage_6":  "max_tokens: 2000",
      "stage_7":  "max_tokens: 600",
      "stage_9":  "max_tokens: 2000 (only if refinement needed)",
      "stage_10": "max_tokens: 2000 (only if include_qa_pack)",
      "total_max": "~11000 tokens per full job (typical: 6000-8000)"
    }
  },

  "QA_AND_TEST_GENERATION_FLOW": {
    "trigger": "include_qa_pack=true in job request OR mode == PHRASE_GAP_ANALYSIS",
    "qa_pack_contents": {
      "positive_tests": {
        "count": "15-20",
        "format": "{ text, expected_intent, expected_entities: {} }",
        "source": "Subset of generated phrases re-used as unit tests. Each must clearly match the target intent."
      },
      "hard_negatives": {
        "count": "10-15",
        "format": "{ text, should_not_match: 'intent_name', reason, tip: 'what makes it a good negative' }",
        "description": "Phrases that look SIMILAR to the target intent but should NOT match it. Tests boundary precision.",
        "example": "If intent is check_eligibility: hard negative is 'my onboarding is complete' (similar domain but different intent)"
      },
      "boundary_tests": {
        "count": "8-12",
        "format": "{ text, expected_intent, risk_level: high|medium, note }",
        "description": "Phrases at the semantic edge of the intent — tests where the NLU might get confused."
      },
      "entity_extraction_tests": {
        "count": "10-15",
        "format": "{ text, expected_intent, expected_entities: { '@entity_name': 'value' } }",
        "description": "Phrases designed specifically to verify entity slot extraction works correctly."
      },
      "confusion_examples": {
        "count": "5-8",
        "format": "{ text, false_positive_risk: 'intent_name', explanation }",
        "description": "Phrases that users commonly get wrong when testing — known failure patterns."
      }
    },
    "test_pack_file_format": "Compatible with wa-masterConversationSuite-part*.json schema for consistency"
  },

  "DATA_MODELS_AND_STORAGE_REQUIREMENTS": {
    "PhraseEnhancerJob": {
      "collection": "phrase_enhancer_jobs",
      "fields": {
        "_id": "ObjectId",
        "tenantId": "String",
        "userId": "String",
        "agentId": "ObjectId — target DfAgent",
        "intentId": "ObjectId — target DfIntent (null for new intent)",
        "mode": "enum: 10 task modes",
        "status": "enum: QUEUED|RUNNING|COMPLETED|FAILED|AWAITING_REVIEW",
        "current_stage": "Number 1-10",
        "stage_name": "String",
        "percent_complete": "Number 0-100",

        "prompt_text": "String",
        "example_phrases_from_user": "[String]",
        "entity_descriptions": "[{ name, description, examples }]",
        "desired_phrase_count": "Number default 50",
        "language_mode": "enum: english|hindi|hinglish|mixed",
        "tone_mode": "enum: formal|casual|mixed",
        "merge_strategy": "enum: 5 strategies",
        "strict_intent_boundary": "Boolean",
        "nearby_competing_intents": "[ObjectId]",
        "current_issues": "String",
        "locked_phrases": "[String]",
        "include_qa_pack": "Boolean",

        "parsed_request": "Object — Stage 1 output",
        "parsed_user_examples": "Object — Stage 2 output",
        "parsed_entity_descriptions": "Object — Stage 3 output",
        "weakness_analysis": "Object — Stage 4 output",
        "phrase_candidates": "Object — Stage 5 output",
        "entity_aware_phrases": "Object — Stage 6 output",
        "validation_report": "Object — Stage 7 output",
        "conflict_risk_report": "Object — Stage 7 output",
        "filtered_phrases": "Object — Stage 8 output",
        "final_phrase_pack": "Object — Stage 9 output",
        "qa_test_pack": "Object — Stage 10 output",
        "quality_score": "Object — composite score",

        "saved_intent_id": "ObjectId — set after SAVE",
        "improvement_history": "Array<{ iteration, score_before, score_after, changes }>",
        "error_log": "Array<{ stage, message, timestamp }>",
        "createdAt": "Date",
        "updatedAt": "Date"
      }
    },
    "PhraseEnhancerVersion": {
      "collection": "phrase_enhancer_versions",
      "fields": {
        "_id": "ObjectId",
        "jobId": "ObjectId",
        "tenantId": "String",
        "intentId": "ObjectId",
        "version_number": "Number",
        "state": "enum: DRAFT|REFINED|VALIDATED|HUMAN_APPROVED|SAVED_TO_INTENT",
        "phrase_pack": "Object — categorized FinalPhrasePack",
        "quality_score": "Object",
        "validation_report": "Object",
        "conflict_risk_report": "Object",
        "change_summary": "String",
        "diff": "{ added: [String], removed: [String], preserved: [String] }",
        "created_by_stage": "Number",
        "createdAt": "Date"
      }
    },
    "FinalPhrasePack_Schema": {
      "intent_id": "ObjectId",
      "intent_name": "String",
      "total_count": "Number",
      "categories": {
        "direct":         "[{ text, source, entities_used, conflict_risk }]",
        "conversational": "[{ text, source, entities_used }]",
        "entity_rich":    "[{ text, entity_combo, slot_positions }]",
        "multi_slot":     "[{ text, slots: [{ entity, position }] }]",
        "short_reply":    "[{ text, for_slot_context }]",
        "formal":         "[{ text }]",
        "casual":         "[{ text }]",
        "multilingual":   "[{ text, language }]",
        "paraphrase":     "[{ text }]",
        "problem_report": "[{ text }]"
      },
      "removed_phrases": "[{ text, reason, original_source }]",
      "conflict_risk_phrases": "[{ text, competing_intent, similarity, risk_level }]",
      "quality_score": { "total": "Number 0-100", "grade": "A|B|C|D", "dimensions": {} }
    },
    "QATestPack_Schema": {
      "intent_id": "ObjectId",
      "positive_tests": "[{ text, expected_intent, expected_entities }]",
      "hard_negatives": "[{ text, should_not_match, reason }]",
      "boundary_tests": "[{ text, expected_intent, risk_level, note }]",
      "entity_extraction_tests": "[{ text, expected_intent, expected_entities }]",
      "confusion_examples": "[{ text, false_positive_risk, explanation }]"
    }
  },

  "API_ENDPOINT_DESIGN": {
    "base_path": "/phrase-enhancer",
    "endpoints": [
      {
        "method": "POST",
        "path": "/jobs",
        "description": "Create and queue a phrase enhancement job",
        "body": "PhraseEnhancerJobRequest (see REQUEST_PAYLOAD_MODEL)",
        "response": "{ jobId, status: 'QUEUED', estimatedDurationMs, pollUrl }"
      },
      {
        "method": "GET",
        "path": "/jobs/:jobId/status",
        "description": "Poll job progress",
        "response": "{ jobId, status, current_stage, stage_name, percent_complete, quality_score?, error? }"
      },
      {
        "method": "GET",
        "path": "/jobs/:jobId/preview",
        "description": "Get full preview of generated phrase pack grouped by category",
        "response": "{ jobId, final_phrase_pack, quality_score, validation_report, conflict_risk_report, qa_test_pack? }"
      },
      {
        "method": "POST",
        "path": "/jobs/:jobId/approve",
        "description": "Save approved phrase pack to intent using merge strategy",
        "body": "{ section_overrides?: { excluded_categories: [] }, locked_phrases?: string[] }",
        "response": "{ savedIntentId, merge_result: { added, removed, preserved }, new_phrase_count, republish_status }"
      },
      {
        "method": "POST",
        "path": "/jobs/:jobId/regenerate-category",
        "description": "Regenerate a specific phrase category",
        "body": "{ category: 'direct|conversational|entity_rich|...', feedback?: string }",
        "response": "{ jobId, category, new_phrases: [], quality_delta }"
      },
      {
        "method": "GET",
        "path": "/jobs/:jobId/qa-pack",
        "description": "Get QA test pack for the generated phrases",
        "response": "QATestPack"
      },
      {
        "method": "GET",
        "path": "/jobs/:jobId/history",
        "description": "List all phrase versions for this job",
        "response": "Array<PhraseEnhancerVersion>"
      },
      {
        "method": "POST",
        "path": "/jobs/:jobId/rollback/:versionId",
        "description": "Rollback intent phrases to a previous version",
        "response": "{ status: 'ROLLED_BACK', activeVersionId, restored_phrase_count }"
      },
      {
        "method": "GET",
        "path": "/intents/:intentId/analysis",
        "description": "Analyze current phrase quality for an intent (no job required)",
        "response": "{ intentId, phrase_count, quality_score, weakness_report, missing_types, entity_coverage }"
      },
      {
        "method": "GET",
        "path": "/jobs",
        "description": "List all phrase enhancer jobs for tenant",
        "query": "{ agentId?, intentId?, status?, limit, offset }",
        "response": "{ jobs: [], total }"
      }
    ]
  },

  "BACKGROUND_JOB_ORCHESTRATION": {
    "queue": "Reuses modules/agentArchitect/services/jobQueue.service.js (EventEmitter-based, no Redis)",
    "job_name": "phrase-enhancer-pipeline",
    "concurrency": "Shared with agentArchitect queue concurrency (AI_ARCHITECT_QUEUE_CONCURRENCY)",
    "stage_timeout_ms": 45000,
    "recovery": "RUNNING jobs found at startup marked FAILED. User can retry.",
    "stage_persistence": "After each stage: job.current_stage + output field saved to DB immediately. Pipeline can resume from last completed stage if needed.",
    "progress_percent_map": {
      "1": 10, "2": 18, "3": 24, "4": 32, "5": 50, "6": 65, "7": 75, "8": 83, "9": 92, "10": 100
    }
  },

  "FAILSAFE_AND_ERROR_HANDLING": {
    "model_unavailable": "Stages 1, 4, 5, 6, 7, 9, 10 are model-dependent. If model unavailable: Stage 1 uses keyword fallback. Stages 4,7 use programmatic-only checks. Stages 5,6 return empty candidate set and set status=AWAITING_REVIEW with reason='AI model not configured'.",
    "model_returns_bad_json": "Retry 3 times with exponential backoff. On third failure: mark that stage's output as NEEDS_HUMAN_REVIEW, continue with empty candidate set for that stage.",
    "no_phrases_generated": "If Stage 5 produces zero candidates: skip Stage 6-8, mark job FAILED with diagnostic message explaining why (common causes: API key missing, model refused, invalid intent description).",
    "boundary_guard_without_embeddings": "If embeddingEngine not ready: skip cosine similarity check. Use keyword overlap fallback. Mark ConflictRiskReport as 'low_confidence' in preview.",
    "zero_nearby_intents": "If no nearby intents found (single-intent agent): skip boundary guard. All phrases pass.",
    "partial_save_failure": "If saving to DfIntent fails (DB error): store final_phrase_pack in PhraseEnhancerVersion. User can retry save without regenerating.",
    "re_publish_failure": "If publishService.publish() fails after save: phrase data is saved but index not updated. Return { saved: true, republish_status: 'FAILED', retry_url: '/df/agents/:id/publish' }."
  },

  "SECURITY_AND_ACCESS_CONTROL": {
    "api_key_usage": "Same AI_ARCHITECT_API_KEY — no new secrets needed.",
    "prompt_logging": "Phrase content hashed (SHA-256) in call logs. Raw phrases never stored in AgentArchitectCallLog.",
    "phrase_data_safety": "Generated phrases stored in PhraseEnhancerJob and PhraseEnhancerVersion — tenant-isolated. Phrases only accessible to requests with matching tenantId.",
    "intent_access": "Only intents belonging to the requesting tenantId can be targeted. Validated at DfIntent.findOne({ _id, tenantId }) level.",
    "save_authorization": "POST /approve requires that the requesting userId either owns the job OR has agent_architect_approver role.",
    "audit": "Every job creation, stage completion, approval, and save logged with userId + tenantId + timestamp."
  },

  "SAMPLE_INTERNAL_PROMPTS": {
    "UNDERSTAND_IMPROVEMENT_REQUEST": {
      "system": "You are an expert intent training data improvement classifier. Analyze the user's request and determine: what type of improvement they want, what intent they are targeting, and what raw materials they have provided. Always return valid JSON only.",
      "user_template": "User request: '{{prompt_text}}'\nTarget agent: {{agent_name}}\nTarget intent: {{intent_name}}\nCurrent phrase count: {{phrase_count}}\n\nReturn: { mode, detected_user_examples: [], detected_entity_hints: [], constraints: {strict_boundary, language_mode, tone_mode, desired_count}, current_issues, confidence }",
      "max_tokens": 400
    },
    "ANALYZE_EXISTING_PHRASES": {
      "system": "You are a dialogue training data quality auditor. Review the provided intent phrases and identify gaps, weaknesses, and improvement opportunities. Return a structured analysis. Valid JSON only.",
      "user_template": "Intent: {{intent_name}}\nDomain: {{domain}}\nExisting phrases (sample of up to 20):\n{{existing_phrases_sample}}\nDefined entities: {{entity_names}}\nNearby intents to protect against: {{nearby_intent_names}}\n\nIdentify: phrase_type_gaps, entity_coverage_gaps, over_represented_patterns, missing_phrase_types, ambiguous_phrases.",
      "max_tokens": 800
    },
    "GENERATE_BASE_PHRASES": {
      "system": "You are an expert conversational AI training data specialist. Generate high-quality training phrases for the given intent. RULES: (1) Never copy user examples verbatim — extend and vary them. (2) Generate diverse phrase types: direct, conversational, formal, casual, paraphrase, indirect. (3) Include typo variants for ~10-15% of phrases. (4) Vary sentence structure. (5) Max 120 chars per phrase. (6) Do NOT generate phrases that could match nearby intents. Return valid JSON only.",
      "user_template": "Intent: {{intent_name}}\nDescription: {{intent_description}}\nDomain: {{domain}}\nExisting user examples (seeds, not targets): {{user_examples}}\nWeakness analysis: {{weakness_analysis}}\nNearby intents to AVOID: {{nearby_intent_names}}\nLanguage mode: {{language_mode}}\nTone mode: {{tone_mode}}\nTarget count: {{target_count}} phrases total across all categories.\n\nReturn: { categories: { direct: [{text}], conversational: [{text}], formal: [{text}], casual: [{text}], paraphrase: [{text}], indirect: [{text}], problem_report: [{text}], short_reply: [{text}] } }",
      "max_tokens": 3000
    },
    "GENERATE_ENTITY_AWARE_PHRASES": {
      "system": "You are an entity-aware training data designer for dialogue systems. Generate training phrases that naturally include the described entities. Each phrase must read naturally — never awkwardly force entity mentions. Generate all combination types specified. Return valid JSON only.",
      "user_template": "Intent: {{intent_name}}\nEntities defined:\n{{entity_descriptions}}\nEntity combination matrix:\n{{combination_matrix}}\nExisting entity examples (from user): {{entity_user_examples}}\n\nFor each entity combination, generate phrases of types: single_entity, entity_at_start, entity_at_end, dual_entity, multi_slot, entity_optional, short_slot_fill, natural_combined.\n\nReturn: { entity_phrase_sets: [ { entity_combo: [], phrases: [{text, slot_positions: []}] } ] }",
      "max_tokens": 2000
    },
    "DETECT_BOUNDARY_DRIFT": {
      "system": "You are a semantic boundary validator for dialogue systems. Given a list of phrase candidates and nearby competing intents, identify any phrases that could be misrouted to a wrong intent. Return valid JSON only.",
      "user_template": "Target intent: {{intent_name}}\nPhrase candidates (first 40):\n{{phrase_candidates_sample}}\nNearby competing intents with sample phrases:\n{{nearby_intents_with_phrases}}\n\nFor each candidate phrase: determine if it could trigger a competing intent. Return: { high_risk: [{phrase, competing_intent, reason}], medium_risk: [{phrase, competing_intent, reason}], clean: [phrase_texts] }",
      "max_tokens": 600
    },
    "REFINE_PHRASE_PACK": {
      "system": "You are a phrase quality improvement specialist. The generated phrase set has quality issues. Fix ONLY the identified issues and return additional phrases or replacements. Never reduce the total count. Return valid JSON only.",
      "user_template": "Intent: {{intent_name}}\nQuality issues identified:\n{{validation_errors}}\nCurrent phrase count by category:\n{{category_counts}}\nMissing categories: {{missing_categories}}\nEntity gaps: {{entity_coverage_gaps}}\n\nReturn: { add_to_categories: { category_name: [{text}] }, replacement_suggestions: [{old_phrase, new_phrase, reason}] }",
      "max_tokens": 2000
    },
    "GENERATE_PHRASE_TEST_PACK": {
      "system": "You are a QA test designer for dialogue intent systems. Generate a comprehensive test pack for this intent's training phrases. Tests must be realistic, precise, and useful for regression testing. Return valid JSON only.",
      "user_template": "Intent: {{intent_name}}\nFinal approved phrases (sample of 20): {{final_phrases_sample}}\nEntities defined: {{entity_names}}\nNearby competing intents: {{nearby_intent_names}}\n\nGenerate: (1) 15 positive_tests — clearly in-scope phrases; (2) 10 hard_negatives — similar but should NOT match this intent; (3) 8 boundary_tests — edge-case phrases; (4) 10 entity_extraction_tests — verify slot filling.\n\nReturn: { positive_tests, hard_negatives, boundary_tests, entity_extraction_tests }",
      "max_tokens": 2000
    }
  },

  "SAMPLE_INPUT_OUTPUT_FLOWS": {
    "flow_1_complaint_only": {
      "input": "{ agentId: '...', intentId: 'check_eligibility', prompt_text: 'This intent has weak phrases, it is missing a lot of natural user phrasing', desired_phrase_count: 50 }",
      "stage_1": "mode=IMPROVE_EXISTING_INTENT_PHRASES, no user examples detected, no entity descriptions",
      "stage_4": "WeaknessAnalyzer: 12 existing phrases, 80% direct only, 0 conversational, 0 short-reply, 0 Hinglish, entity @onboarding_method appears in only 2 phrases",
      "stage_5": "Generated 42 base phrases: 8 direct, 12 conversational, 8 formal, 7 casual, 7 paraphrase",
      "stage_6": "Entity phrases for @onboarding_method: 8 more phrases",
      "stage_7": "Validation: 0 errors, 2 warnings (V-PH-12 short-reply below target). ConflictRiskReport: 2 medium_risk phrases flagged.",
      "stage_8": "Removed 3 near-duplicates, 2 high-risk phrases. Clean set: 47 phrases.",
      "stage_9": "Final pack: 50 phrases across 8 categories. Quality score: 78 (Grade B).",
      "result": "Phrase set improved from 12 to 50 phrases. Quality improved from D to B."
    },
    "flow_2_examples_plus_entities": {
      "input": "{ agentId: '...', intentId: 'provide_business_info', prompt_text: 'Here are 5 example phrases. Entity descriptions attached.', example_phrases_from_user: ['We are ABC and our phone is 9999999999', 'business name is xyz'], entity_descriptions: [{name:'business_name', description:'registered business name'}, {name:'phone_number', description:'10-digit contact number'}, {name:'business_category', description:'industry category'}], desired_phrase_count: 60 }",
      "stage_2": "ParsedUserExamples: 2 phrases, pattern: multi_slot + direct. Entities found: business_name + phone_number in phrase 1.",
      "stage_3": "ParsedEntityDescriptions: 3 entities. combination_matrix: 7 combos. Placeholders: {business_name}, {phone_number}, {business_category}.",
      "stage_5": "Generated 35 base phrases across categories.",
      "stage_6": "Entity phrases: 25 entity-aware phrases across 7 entity combinations. Includes: 7 single-entity, 9 dual-entity, 9 triple-entity.",
      "stage_9": "Final pack: 60 phrases. Quality score: 84 (Grade B). Entity coverage: all 3 entities in 5+ phrases each.",
      "result": "Production-ready 60-phrase set with full entity-combination coverage."
    },
    "flow_3_tighten_boundary": {
      "input": "{ agentId: '...', intentId: 'campaign_analytics', prompt_text: 'Do not make this intent broad. Keep it only for campaign analytics, not campaign creation or sending.', strict_intent_boundary: true, nearby_competing_intents: ['campaign_creation', 'send_campaign'] }",
      "stage_1": "mode=TIGHTEN_INTENT_BOUNDARY. strict_intent_boundary=true.",
      "stage_4": "WeaknessAnalyzer: 8 existing phrases drift toward campaign_creation. High risk phrases identified.",
      "stage_7": "IntentBoundaryGuard: 3 high_risk, 5 medium_risk from existing + candidates. strict_mode: threshold 0.40.",
      "stage_8": "Removed 3 high_risk + 3 medium_risk (strict mode). Net clean: 22 phrases from 28.",
      "stage_9": "Refined 6 medium_risk phrases with AI rewrites to tighten scope.",
      "result": "Intent phrase set protected from boundary drift. 2 phrases remain flagged for human review."
    }
  },

  "IMPLEMENTATION_PHASE_PLAN": {
    "phase_1": {
      "name": "Core Infrastructure + Stage 1-2",
      "deliverables": [
        "PhraseEnhancerJob + PhraseEnhancerVersion models",
        "phraseInterpreter.service.js (Stage 1 — LLM + keyword fallback)",
        "existingIntentFetcher.service.js (load DfIntent + agent context)",
        "userPhraseParser.service.js (programmatic — Stage 2)",
        "entityDescriptionParser.service.js (programmatic — Stage 3)",
        "Basic API: POST /jobs, GET /jobs/:id/status, GET /jobs/:id/preview",
        "phraseOrchestrator.service.js (stages 1-3 only)"
      ]
    },
    "phase_2": {
      "name": "Phrase Generation Pipeline (Stages 4-6)",
      "deliverables": [
        "phraseGenerationEngine.service.js (Stage 5 — base phrase generation)",
        "entityAwarePhraseEngine.service.js (Stage 6 — entity-aware phrases)",
        "WeaknessAnalyzer (Stage 4 — programmatic + LLM)",
        "POST /jobs/:id/approve, GET /jobs/:id/history"
      ]
    },
    "phase_3": {
      "name": "Validation + Boundary Protection (Stages 7-9)",
      "deliverables": [
        "intentBoundaryGuard.service.js (embedding cosine similarity)",
        "phraseQualityValidator.service.js (all 12 V-PH-* checks)",
        "duplicateNoiseFilter.service.js",
        "phraseDiversityEngine.service.js (final pack assembly)",
        "phraseMergeStrategy.service.js",
        "Full approval + save to DfIntent + re-publish flow"
      ]
    },
    "phase_4": {
      "name": "QA Pack + Refinement Loop + Versioning",
      "deliverables": [
        "qaTestPhraseGenerator.service.js (Stage 10)",
        "phraseVersioning.service.js (rollback support)",
        "Auto-refinement loop (score < 65 trigger)",
        "POST /jobs/:id/regenerate-category",
        "POST /jobs/:id/rollback/:versionId",
        "GET /intents/:intentId/analysis"
      ]
    }
  }
}