From 690b1e15fe1056e60f9bb1fa7849c1617b668ea8 Mon Sep 17 00:00:00 2001 From: Artem Vovk <53193260+artemvovk@users.noreply.github.com> Date: Mon, 16 Dec 2024 20:59:33 -0700 Subject: [PATCH] fix: use cut-over-lock-timeout for instant DDL (#1468) * fix: use cut-over-lock-timeout for instant DDL Addresses https://github.com/github/gh-ost/issues/1386 by reusing the cut-over-lock-timeout from the cutover code. The lock wait timeout in the original code is actually set to double the setting, so we keep that consistent. * add new usage to the arg description * Rename variable `query` to `lockTimeoutQuery` --------- Co-authored-by: meiji163 --- go/cmd/gh-ost/main.go | 2 +- go/logic/applier.go | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/go/cmd/gh-ost/main.go b/go/cmd/gh-ost/main.go index 3e6057995..0829429e0 100644 --- a/go/cmd/gh-ost/main.go +++ b/go/cmd/gh-ost/main.go @@ -107,7 +107,7 @@ func main() { chunkSize := flag.Int64("chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 10-100,000)") dmlBatchSize := flag.Int64("dml-batch-size", 10, "batch size for DML events to apply in a single transaction (range 1-100)") defaultRetries := flag.Int64("default-retries", 60, "Default number of retries for various operations before panicking") - cutOverLockTimeoutSeconds := flag.Int64("cut-over-lock-timeout-seconds", 3, "Max number of seconds to hold locks on tables while attempting to cut-over (retry attempted when lock exceeds timeout)") + cutOverLockTimeoutSeconds := flag.Int64("cut-over-lock-timeout-seconds", 3, "Max number of seconds to hold locks on tables while attempting to cut-over (retry attempted when lock exceeds timeout) or attempting instant DDL") niceRatio := flag.Float64("nice-ratio", 0, "force being 'nice', imply sleep time per chunk time; range: [0.0..100.0]. Example values: 0 is aggressive. 1: for every 1ms spent copying rows, sleep additional 1ms (effectively doubling runtime); 0.7: for every 10ms spend in a rowcopy chunk, spend 7ms sleeping immediately after") maxLagMillis := flag.Int64("max-lag-millis", 1500, "replication lag at which to throttle operation") diff --git a/go/logic/applier.go b/go/logic/applier.go index 1be696909..59562dc7f 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -255,6 +255,15 @@ func (this *Applier) ValidateOrDropExistingTables() error { func (this *Applier) AttemptInstantDDL() error { query := this.generateInstantDDLQuery() this.migrationContext.Log.Infof("INSTANT DDL query is: %s", query) + + // Reuse cut-over-lock-timeout from regular migration process to reduce risk + // in situations where there may be long-running transactions. + tableLockTimeoutSeconds := this.migrationContext.CutOverLockTimeoutSeconds * 2 + this.migrationContext.Log.Infof("Setting LOCK timeout as %d seconds", tableLockTimeoutSeconds) + lockTimeoutQuery := fmt.Sprintf(`set /* gh-ost */ session lock_wait_timeout:=%d`, tableLockTimeoutSeconds) + if _, err := this.db.Exec(lockTimeoutQuery); err != nil { + return err + } // We don't need a trx, because for instant DDL the SQL mode doesn't matter. _, err := this.db.Exec(query) return err