ikawrakow · Ph0rk0z · Jun 6, 2025 · Jun 7, 2025 · Jun 7, 2025 · Jun 7, 2025
diff --git a/common/common.cpp b/common/common.cpp
@@ -666,6 +666,46 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
         sparams.top_n_sigma = std::stof(argv[i]);
         return true;
     }
+    if (arg == "--dry-multiplier") {
+        CHECK_ARG
+        sparams.dry_multiplier = std::stof(argv[i]);
+        return true;
+    }
+    if (arg == "--dry-base") {
+        CHECK_ARG
+        float potential_base = std::stof(argv[i]);
+        if (potential_base >= 1.0f) {
+            sparams.dry_base = potential_base;
+        }
+        return true;
+    }
+    if (arg == "--dry-allowed-length") {
+        CHECK_ARG
+        sparams.dry_allowed_length = std::stoi(argv[i]);
+        return true;
+    }
+    if (arg == "--dry-penalty-last-n") {
+        CHECK_ARG
+        sparams.dry_penalty_last_n = std::stoi(argv[i]);
+        return true;
+    }
+    if (arg == "--dry-sequence-breaker") {
+        CHECK_ARG
+        static bool defaults_cleared = false;
+
+        if (!defaults_cleared) {
+            sparams.dry_sequence_breakers.clear();
+            defaults_cleared = true;
+        }
+
+        if (std::string(argv[i]) == "none") {
+            sparams.dry_sequence_breakers.clear();
+        } else {
+            sparams.dry_sequence_breakers.emplace_back(argv[i]);
+        }
+        return true;
+        //add input checking
+    }
     if (arg == "--cfg-negative-prompt") {
         CHECK_ARG
         sparams.cfg_negative_prompt = argv[i];
@@ -1668,12 +1708,21 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
     options.push_back({ "*",           "       --mirostat-lr N",        "Mirostat learning rate, parameter eta (default: %.1f)", (double)sparams.mirostat_eta });
     options.push_back({ "*",           "       --mirostat-ent N",       "Mirostat target entropy, parameter tau (default: %.1f)", (double)sparams.mirostat_tau });
     options.push_back({ "*",           "       --xtc-probability p",    "xtc probability (default: %.1f, 0.0 = disabled)", (double)sparams.xtc_probability });
-    options.push_back({ "*",           "       --xtc-threshold t",      "xtc threshold (default: %.1f, >0.5 = disabled)", (double)sparams.xtc_threshold});
-    options.push_back({ "*",           "       --top-n-sigma t",        "top-n-sigma parmeter (default: %.1f, 0.0 = disabled)", (double)sparams.top_n_sigma});
+    options.push_back({ "*",           "       --xtc-threshold t",      "xtc threshold (default: %.1f, >0.5 = disabled)", (double)sparams.xtc_threshold });
+    options.push_back({ "*",           "       --top-n-sigma t",        "top-n-sigma parmeter (default: %.1f, 0.0 = disabled)", (double)sparams.top_n_sigma });
     options.push_back({ "*",           "       -l TOKEN_ID(+/-)BIAS",   "modifies the likelihood of token appearing in the completion,\n"
                                                                         "i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n"
-                                                                        "or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'" });
-    options.push_back({ "main",        "       --cfg-negative-prompt PROMPT",
+                                                                        "or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'",
+sparams.dry_allowed_length });
+    options.push_back({ "*",           "       --dry-allowed-length N",        "dry_allowed_length: (default: 2)", 
+(double)sparams.dry_base });
+    options.push_back({ "*",           "       --dry-base t",        "dry_base: (default: 1.75)", 
+(double)sparams.dry_multiplier });
+    options.push_back({ "*",           "       --dry-multiplier t",        "dry_multiplier: (default: 0.0)", 
+sparams.dry_penalty_last_n });
+    options.push_back({ "*",           "       --dry-penalty-last-n N",        "dry_penalty_last_n: default: -1 (0 = disable, -1 = context size)"}); 
+
+   options.push_back({ "main",         "       --cfg-negative-prompt PROMPT",
                                                                         "negative prompt to use for guidance (default: '%s')", sparams.cfg_negative_prompt.c_str() });
     options.push_back({ "main",        "       --cfg-negative-prompt-file FNAME",
                                                                         "negative prompt file to use for guidance" });
@@ -3434,6 +3483,10 @@ void yaml_dump_non_result_info(FILE * stream, const gpt_params & params, const l
     fprintf(stream, "xtc_probability: %f # default: 0.0\n", sparams.xtc_probability);
     fprintf(stream, "xtc_threshold: %f # default: 0.0\n", sparams.xtc_threshold);
     fprintf(stream, "top_n_sigma: %f # default: 0.0\n", sparams.top_n_sigma);
+    fprintf(stream, "dry_allowed_length: %d # default: 2\n", sparams.dry_allowed_length);
+    fprintf(stream, "dry_base: %.2f # default: 1.75\n", sparams.dry_base);
+    fprintf(stream, "dry_multiplier: %.1f # default: 0.0\n", sparams.dry_multiplier);
+    fprintf(stream, "dry_penalty_last_n: %d # default: -1 (0 = disable, -1 = context size)\n", sparams.dry_penalty_last_n);
     fprintf(stream, "mlock: %s # default: false\n", params.use_mlock ? "true" : "false");
     fprintf(stream, "model: %s # default: %s\n", params.model.c_str(), DEFAULT_MODEL_PATH);
     fprintf(stream, "model_draft: %s # default:\n", params.model_draft.c_str());

diff --git a/common/sampling.cpp b/common/sampling.cpp
@@ -118,15 +118,17 @@ std::string llama_sampling_prev_str(llama_sampling_context * ctx_sampling, llama
 std::string llama_sampling_print(const llama_sampling_params & params) {
     char result[1024];
 
-    snprintf(result, sizeof(result),
-            "\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
-            "\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, typical_p = %.3f, temp = %.3f\n"
-            "\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f\n"
-            "\txtc_probability = %.3f, xtc_threshold = %.3f, top_n_sigma = %.3f",
-            params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present,
-            params.top_k, params.tfs_z, params.top_p, params.min_p, params.typical_p, params.temp,
-            params.mirostat, params.mirostat_eta, params.mirostat_tau,
-            params.xtc_probability, params.xtc_threshold, params.top_n_sigma);
+snprintf(result, sizeof(result),
+        "\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
+        "\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, typical_p = %.3f, temp = %.3f\n"
+        "\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f\n"
+        "\txtc_probability = %.3f, xtc_threshold = %.3f, top_n_sigma = %.3f\n"
+        "\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d",
+        params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present,
+        params.top_k, params.tfs_z, params.top_p, params.min_p, params.typical_p, params.temp,
+        params.mirostat, params.mirostat_eta, params.mirostat_tau,
+        params.xtc_probability, params.xtc_threshold, params.top_n_sigma,
+        params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n);
 
     return std::string(result);
 }
@@ -157,6 +159,7 @@ std::string llama_sampling_type_to_str(llama_sampler_type sampler_type) {
         case llama_sampler_type::TEMPERATURE: return "temperature";
         case llama_sampler_type::XTC        : return "xtc";
         case llama_sampler_type::TOP_N_SIGMA: return "top_n_sigma";
+        case llama_sampler_type::DRY        : return "dry";
         default : return "";
     }
 }
@@ -170,6 +173,7 @@ std::vector<llama_sampler_type> llama_sampling_types_from_names(const std::vecto
         {"tfs_z",       llama_sampler_type::TFS_Z},
         {"xtc",         llama_sampler_type::XTC},
         {"top_n_sigma", llama_sampler_type::TOP_N_SIGMA},
+        {"dry",         llama_sampler_type::DRY},
         {"temperature", llama_sampler_type::TEMPERATURE}
     };
 
@@ -186,6 +190,7 @@ std::vector<llama_sampler_type> llama_sampling_types_from_names(const std::vecto
         {"tfs",         llama_sampler_type::TFS_Z},
         {"xtc",         llama_sampler_type::XTC},
         {"top-n-sigma", llama_sampler_type::TOP_N_SIGMA},
+        {"dry",         llama_sampler_type::DRY},
         {"temp",        llama_sampler_type::TEMPERATURE}
     };
 
@@ -222,6 +227,7 @@ std::vector<llama_sampler_type> llama_sampling_types_from_chars(const std::strin
         {'f', llama_sampler_type::TFS_Z},
         {'x', llama_sampler_type::XTC},
         {'n', llama_sampler_type::TOP_N_SIGMA},
+        {'d', llama_sampler_type::DRY},
         {'t', llama_sampler_type::TEMPERATURE}
     };
 
@@ -242,17 +248,22 @@ static void sampler_queue(
             const llama_sampling_params & params,
                  llama_token_data_array & cur_p,
                                  size_t   min_keep) {
-    const float         temp              = params.temp;
-    const float         dynatemp_range    = params.dynatemp_range;
-    const float         dynatemp_exponent = params.dynatemp_exponent;
-    const int32_t       top_k             = params.top_k;
-    const float         top_p             = params.top_p;
-    const float         min_p             = params.min_p;
-    const float         tfs_z             = params.tfs_z;
-    const float         typical_p         = params.typical_p;
-    const float         xtc_probability   = params.xtc_probability;
-    const float         xtc_threshold     = params.xtc_threshold;
-    const float         top_n_sigma       = params.top_n_sigma;
+    const float         temp               = params.temp;
+    const float         dynatemp_range     = params.dynatemp_range;
+    const float         dynatemp_exponent  = params.dynatemp_exponent;
+    const int32_t       top_k              = params.top_k;
+    const float         top_p              = params.top_p;
+    const float         min_p              = params.min_p;
+    const float         tfs_z              = params.tfs_z;
+    const float         typical_p          = params.typical_p;
+    const float         xtc_probability    = params.xtc_probability;
+    const float         xtc_threshold      = params.xtc_threshold;
+    const float         top_n_sigma        = params.top_n_sigma;
+    const float         dry_multiplier     = params.dry_multiplier;
+    const float         dry_base           = params.dry_base; 
+    const int32_t       dry_allowed_length = params.dry_allowed_length;
+    const int32_t       dry_penalty_last_n = params.dry_penalty_last_n;
+
     const std::vector<llama_sampler_type> & samplers_sequence = params.samplers_sequence;
 
     for (auto sampler_type : samplers_sequence) {
@@ -263,6 +274,10 @@ static void sampler_queue(
             case llama_sampler_type::TOP_P      : llama_sample_top_p    (ctx_main, &cur_p, top_p,     min_keep); break;
             case llama_sampler_type::MIN_P      : llama_sample_min_p    (ctx_main, &cur_p, min_p,     min_keep); break;
             case llama_sampler_type::XTC        : llama_sample_xtc      (ctx_main, &cur_p, xtc_probability, xtc_threshold, min_keep); break;
+            case llama_sampler_type::DRY        : llama_sample_dry      (ctx_main, &cur_p, dry_multiplier, dry_base,
+                                                                         dry_allowed_length, dry_penalty_last_n,
+                                                                         params.dry_sequence_breakers);
+                                                                         break;
             case llama_sampler_type::TOP_N_SIGMA: llama_sample_top_n_sigma(ctx_main, &cur_p, top_n_sigma); break;
             case llama_sampler_type::TEMPERATURE:
                 if (dynatemp_range > 0) {
@@ -469,6 +484,8 @@ void llama_sampling_accept(
     ctx_sampling->prev.erase(ctx_sampling->prev.begin());
     ctx_sampling->prev.push_back(id);
 
+    llama_sample_dry_accept_token(ctx_main, id);
+
     if (ctx_sampling->grammar != NULL && apply_grammar) {
         llama_grammar_accept_token(ctx_sampling->grammar, ctx_main, id);
     }

diff --git a/common/sampling.h b/common/sampling.h
@@ -11,13 +11,13 @@
 
 // sampler types
 enum class llama_sampler_type : char {
-    DRY            ='d',
     TOP_K       = 'k',
     TOP_P       = 'p',
     MIN_P       = 'm',
     TFS_Z       = 'f',
     XTC         = 'x',
     TOP_N_SIGMA = 'n',
+    DRY         = 'd',
     TYPICAL_P   = 'y',
     TEMPERATURE = 't'
 };
@@ -45,6 +45,10 @@ typedef struct llama_sampling_params {
     float       xtc_probability       = 0.0f;               // xtc probability
     float       xtc_threshold         = 1.0f;               // xtc threshold, disabled if > 0.5
     float       top_n_sigma           = 0.0f;               // top-n-sigma
+    float       dry_multiplier        = 0.0f;               // 0.0 = disabled;      DRY repetition penalty for tokens extending repetition:
+    float       dry_base              = 1.75f;              // 0.0 = disabled;      multiplier * base ^ (length of sequence before token - allowed length)
+    int32_t     dry_allowed_length    = 2;                  // tokens extending repetitions beyond this receive penalty
+    int32_t     dry_penalty_last_n    = -1;                 // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
     bool        penalize_nl           = false;              // consider newlines as a repeatable token
     uint32_t    seed                  = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampling_context
 
@@ -58,6 +62,8 @@ typedef struct llama_sampling_params {
         llama_sampler_type::TEMPERATURE
     };
 
+    std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"};     // default sequence breakers for DRY
+
     std::string grammar;  // optional BNF-like grammar to constrain sampling
 
     // Classifier-Free Guidance