diff --git a/README.md b/README.md index a679b86..e69de29 100644 --- a/README.md +++ b/README.md @@ -1,2 +0,0 @@ -# preference-arbiter-2 - diff --git a/config.json b/config.json new file mode 100644 index 0000000..0e72b46 --- /dev/null +++ b/config.json @@ -0,0 +1,52 @@ +{ + "model": null, + "signature": { + "description": "Evaluate and compare the quality of two responses (Response A and Response B) given a specific question.\nDetermine which response better addresses the question by focusing on factual correctness, completeness,\nand adherence to any specific requirements mentioned in the question prompt.\n\nBefore yielding your decision, think step by step and explain your reasoning in the reasoning field.\nBe sure to verbally express your uncertainty in your thought process.\n\nDetailed Instructions:\n\n1. **Understand the Question Context:**\n - Ensure you comprehend the full context and requirements specified by the question or problem statement.\n - Note any domain-specific terminologies or conditions.\n\n2. **Evaluate Each Response:**\n - Check for factual accuracy in the content, calculations, or recommendations provided.\n - Assess the response for completeness\u2014whether it completely addresses all aspects of the question.\n - Verify adherence to the specified question requirements.\n - Consider clarity and structure of the explanation or solution provided.\n\n3. **Decision Making:**\n - Determine which response (A or B) best meets the above criteria.\n - Select the response that is not only correct but also most aligns with the question's specific requirements.\n\n4. **Output Your Conclusion:**\n - Document your reasoning process in the reasoning field.\n - Output \"A>B\" if Response A is better, or \"B>A\" if Response B is better.", + "properties": { + "question": { + "__dspy_field_type": "input", + "desc": "The original question or prompt", + "prefix": "Question:", + "title": "Question", + "type": "string" + }, + "response_A": { + "__dspy_field_type": "input", + "desc": "First response to evaluate", + "prefix": "Response A:", + "title": "Response A", + "type": "string" + }, + "response_B": { + "__dspy_field_type": "input", + "desc": "Second response to evaluate", + "prefix": "Response B:", + "title": "Response B", + "type": "string" + }, + "reasoning": { + "__dspy_field_type": "output", + "desc": "Your step by step reasoning for why you chose the better response. With verbally expressed uncertainty.", + "prefix": "Reasoning:", + "title": "Reasoning", + "type": "string" + }, + "label": { + "__dspy_field_type": "output", + "desc": "Which response is better: 'A>B' or 'B>A'", + "prefix": "Label:", + "title": "Label", + "type": "string" + } + }, + "required": [ + "question", + "response_A", + "response_B", + "reasoning", + "label" + ], + "title": "PreferenceSig", + "type": "object" + } +} \ No newline at end of file diff --git a/probe.json b/probe.json new file mode 100644 index 0000000..1a3019b --- /dev/null +++ b/probe.json @@ -0,0 +1 @@ +{"probe_version":"v1","embedding_dim":5120,"dropout":0.0,"layer_index":16,"num_layers":65,"probe_type":"linear"} \ No newline at end of file diff --git a/probe.safetensors b/probe.safetensors new file mode 100644 index 0000000..fa1d649 Binary files /dev/null and b/probe.safetensors differ diff --git a/program.json b/program.json new file mode 100644 index 0000000..c76d76c --- /dev/null +++ b/program.json @@ -0,0 +1,48 @@ +{ + "traces": [], + "train": [], + "demos": [], + "signature": { + "instructions": "Evaluate and compare the quality of two responses (Response A and Response B) given a specific question.\nDetermine which response better addresses the question by focusing on factual correctness, completeness,\nand adherence to any specific requirements mentioned in the question prompt.\n\nBefore yielding your decision, think step by step and explain your reasoning in the reasoning field.\nBe sure to verbally express your uncertainty in your thought process.\n\nDetailed Instructions:\n\n1. **Understand the Question Context:**\n - Ensure you comprehend the full context and requirements specified by the question or problem statement.\n - Note any domain-specific terminologies or conditions.\n\n2. **Evaluate Each Response:**\n - Check for factual accuracy in the content, calculations, or recommendations provided.\n - Assess the response for completeness\u2014whether it completely addresses all aspects of the question.\n - Verify adherence to the specified question requirements.\n - Consider clarity and structure of the explanation or solution provided.\n\n3. **Decision Making:**\n - Determine which response (A or B) best meets the above criteria.\n - Select the response that is not only correct but also most aligns with the question's specific requirements.\n\n4. **Output Your Conclusion:**\n - Document your reasoning process in the reasoning field.\n - Output \"A>B\" if Response A is better, or \"B>A\" if Response B is better.", + "fields": [ + { + "prefix": "Question:", + "description": "The original question or prompt" + }, + { + "prefix": "Response A:", + "description": "First response to evaluate" + }, + { + "prefix": "Response B:", + "description": "Second response to evaluate" + }, + { + "prefix": "Reasoning:", + "description": "Your step by step reasoning for why you chose the better response. With verbally expressed uncertainty." + }, + { + "prefix": "Label:", + "description": "Which response is better: 'A>B' or 'B>A'" + } + ] + }, + "lm": { + "model": "together_ai/Qwen/Qwen3-VL-32B-Instruct", + "model_type": "chat", + "cache": true, + "num_retries": 3, + "finetuning_model": null, + "launch_kwargs": {}, + "train_kwargs": {}, + "temperature": null, + "max_tokens": null + }, + "metadata": { + "dependency_versions": { + "python": "3.11", + "dspy": "3.1.2", + "cloudpickle": "3.1" + } + } +} \ No newline at end of file