Skip to content

Commit

Permalink
🍒 #8191 - TempLocationManager Fixes and Improvements (#8199)
Browse files Browse the repository at this point in the history
* Remove CompletableFuture from TempLocationManager

(cherry picked from commit d0eed45)

* Do not run the cleanup if there is nothing to clean up

(cherry picked from commit 58d5da8)

* Exclude JFR repository from temp files self-cleanup

(cherry picked from commit a770911)
  • Loading branch information
jbachorik authored Jan 14, 2025
1 parent d24ba9e commit ec1744f
Show file tree
Hide file tree
Showing 2 changed files with 121 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import datadog.trace.api.config.ProfilingConfig;
import datadog.trace.bootstrap.config.provider.ConfigProvider;
import datadog.trace.util.AgentTaskScheduler;
import datadog.trace.util.PidHelper;
import java.io.IOException;
import java.nio.file.FileVisitResult;
Expand All @@ -15,10 +16,10 @@
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -30,6 +31,9 @@
*/
public final class TempLocationManager {
private static final Logger log = LoggerFactory.getLogger(TempLocationManager.class);
private static final Pattern JFR_DIR_PATTERN =
Pattern.compile("\\d{4}_\\d{2}_\\d{2}_\\d{2}_\\d{2}_\\d{2}_\\d{6}");
private static final String TEMPDIR_PREFIX = "pid_";

private static final class SingletonHolder {
private static final TempLocationManager INSTANCE = new TempLocationManager();
Expand Down Expand Up @@ -62,7 +66,7 @@ default FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOE
default void onCleanupStart(boolean selfCleanup, long timeout, TimeUnit unit) {}
}

private class CleanupVisitor implements FileVisitor<Path> {
private final class CleanupVisitor implements FileVisitor<Path> {
private boolean shouldClean;

private final Set<String> pidSet = PidHelper.getJavaPids();
Expand Down Expand Up @@ -98,14 +102,19 @@ public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs)
terminated = true;
return FileVisitResult.TERMINATE;
}
if (cleanSelf && JFR_DIR_PATTERN.matcher(dir.getFileName().toString()).matches()) {
// do not delete JFR repository on 'self-cleanup' - it conflicts with the JFR's own cleanup
return FileVisitResult.SKIP_SUBTREE;
}

cleanupTestHook.preVisitDirectory(dir, attrs);

if (dir.equals(baseTempDir)) {
return FileVisitResult.CONTINUE;
}
String fileName = dir.getFileName().toString();
// the JFR repository directories are under <basedir>/pid_<pid>
String pid = fileName.startsWith("pid_") ? fileName.substring(4) : null;
String pid = fileName.startsWith(TEMPDIR_PREFIX) ? fileName.substring(4) : null;
boolean isSelfPid = pid != null && pid.equals(PidHelper.getPid());
shouldClean |= cleanSelf ? isSelfPid : !isSelfPid && !pidSet.contains(pid);
if (shouldClean) {
Expand Down Expand Up @@ -167,18 +176,43 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx
}
String fileName = dir.getFileName().toString();
// reset the flag only if we are done cleaning the top-level directory
shouldClean = !fileName.startsWith("pid_");
shouldClean = !fileName.startsWith(TEMPDIR_PREFIX);
}
return FileVisitResult.CONTINUE;
}
}

private final class CleanupTask implements Runnable {
private final CountDownLatch latch = new CountDownLatch(1);
private volatile Throwable throwable = null;

@Override
public void run() {
try {
cleanup(false);
} catch (OutOfMemoryError oom) {
throw oom;
} catch (Throwable t) {
throwable = t;
} finally {
latch.countDown();
}
}

boolean await(long timeout, TimeUnit unit) throws Throwable {
boolean ret = latch.await(timeout, unit);
if (throwable != null) {
throw throwable;
}
return ret;
}
}

private final Path baseTempDir;
private final Path tempDir;
private final long cutoffSeconds;

private final CompletableFuture<Void> cleanupTask;

private final CleanupTask cleanupTask = new CleanupTask();
private final CleanupHook cleanupTestHook;

/**
Expand All @@ -200,11 +234,7 @@ public static TempLocationManager getInstance() {
static TempLocationManager getInstance(boolean waitForCleanup) {
TempLocationManager instance = SingletonHolder.INSTANCE;
if (waitForCleanup) {
try {
instance.waitForCleanup(5, TimeUnit.SECONDS);
} catch (TimeoutException ignored) {

}
instance.waitForCleanup(5, TimeUnit.SECONDS);
}
return instance;
}
Expand All @@ -214,10 +244,11 @@ private TempLocationManager() {
}

TempLocationManager(ConfigProvider configProvider) {
this(configProvider, CleanupHook.EMPTY);
this(configProvider, true, CleanupHook.EMPTY);
}

TempLocationManager(ConfigProvider configProvider, CleanupHook testHook) {
TempLocationManager(
ConfigProvider configProvider, boolean runStartupCleanup, CleanupHook testHook) {
cleanupTestHook = testHook;

// In order to avoid racy attempts to clean up files which are currently being processed in a
Expand Down Expand Up @@ -255,21 +286,21 @@ private TempLocationManager() {
baseTempDir = configuredTempDir.resolve("ddprof");
baseTempDir.toFile().deleteOnExit();

tempDir = baseTempDir.resolve("pid_" + pid);
cleanupTask = CompletableFuture.runAsync(() -> cleanup(false));
tempDir = baseTempDir.resolve(TEMPDIR_PREFIX + pid);
if (runStartupCleanup) {
// do not execute the background cleanup task when running in tests
AgentTaskScheduler.INSTANCE.execute(() -> cleanup(false));
}

Thread selfCleanup =
new Thread(
() -> {
try {
waitForCleanup(1, TimeUnit.SECONDS);
} catch (TimeoutException e) {
if (!waitForCleanup(1, TimeUnit.SECONDS)) {
log.info(
"Cleanup task timed out. {} temp directory might not have been cleaned up properly",
tempDir);
} finally {
cleanup(true);
}
cleanup(true);
},
"Temp Location Manager Cleanup");
Runtime.getRuntime().addShutdownHook(selfCleanup);
Expand Down Expand Up @@ -344,6 +375,19 @@ boolean cleanup(boolean cleanSelf) {
*/
boolean cleanup(boolean cleanSelf, long timeout, TimeUnit unit) {
try {
if (!Files.exists(baseTempDir)) {
// not event the main temp location exists; nothing to clean up
return true;
}
try (Stream<Path> paths = Files.walk(baseTempDir)) {
if (paths.noneMatch(
path ->
Files.isDirectory(path)
&& path.getFileName().toString().startsWith(TEMPDIR_PREFIX))) {
// nothing to clean up; bail out early
return true;
}
}
cleanupTestHook.onCleanupStart(cleanSelf, timeout, unit);
CleanupVisitor visitor = new CleanupVisitor(cleanSelf, timeout, unit);
Files.walkFileTree(baseTempDir, visitor);
Expand All @@ -359,21 +403,24 @@ boolean cleanup(boolean cleanSelf, long timeout, TimeUnit unit) {
}

// accessible for tests
void waitForCleanup(long timeout, TimeUnit unit) throws TimeoutException {
boolean waitForCleanup(long timeout, TimeUnit unit) {
try {
cleanupTask.get(timeout, unit);
return cleanupTask.await(timeout, unit);
} catch (InterruptedException e) {
cleanupTask.cancel(true);
log.debug("Temp directory cleanup was interrupted");
Thread.currentThread().interrupt();
} catch (TimeoutException e) {
cleanupTask.cancel(true);
throw e;
} catch (ExecutionException e) {
} catch (Throwable t) {
if (log.isDebugEnabled()) {
log.debug("Failed to cleanup temp directory: {}", tempDir, e);
log.debug("Failed to cleanup temp directory: {}", tempDir, t);
} else {
log.debug("Failed to cleanup temp directory: {}", tempDir);
}
}
return false;
}

// accessible for tests
void createDirStructure() throws IOException {
Files.createDirectories(baseTempDir);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import java.util.UUID;
import java.util.concurrent.Phaser;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.LockSupport;
import java.util.stream.Stream;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -173,7 +174,7 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx
}
};

TempLocationManager mgr = instance(baseDir, blocker);
TempLocationManager mgr = instance(baseDir, true, blocker);

// wait for the cleanup start
phaser.arriveAndAwaitAdvance();
Expand All @@ -187,8 +188,9 @@ public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOEx

@ParameterizedTest
@MethodSource("timeoutTestArguments")
void testCleanupWithTimeout(boolean selfCleanup, String section) throws Exception {
long timeoutMs = 500;
void testCleanupWithTimeout(boolean selfCleanup, boolean shouldSucceed, String section)
throws Exception {
long timeoutMs = 10;
TempLocationManager.CleanupHook delayer =
new TempLocationManager.CleanupHook() {
@Override
Expand Down Expand Up @@ -229,30 +231,63 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
Files.createTempDirectory(
"ddprof-test-",
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
TempLocationManager instance = instance(baseDir, delayer);
boolean rslt = instance.cleanup(selfCleanup, timeoutMs, TimeUnit.MILLISECONDS);
assertTrue(rslt);
TempLocationManager instance = instance(baseDir, false, delayer);
Path mytempdir = instance.getTempDir();
Path otherTempdir = mytempdir.getParent().resolve("pid_0000");
Files.createDirectories(otherTempdir);
Files.createFile(mytempdir.resolve("dummy"));
Files.createFile(otherTempdir.resolve("dummy"));
boolean rslt =
instance.cleanup(
selfCleanup, (long) (timeoutMs * (shouldSucceed ? 10 : 0.5d)), TimeUnit.MILLISECONDS);
assertEquals(shouldSucceed, rslt);
}

@Test
void testShortCircuit() throws Exception {
Path baseDir =
Files.createTempDirectory(
"ddprof-test-",
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwx------")));
AtomicBoolean executed = new AtomicBoolean();
TempLocationManager.CleanupHook hook =
new TempLocationManager.CleanupHook() {
@Override
public void onCleanupStart(boolean selfCleanup, long timeout, TimeUnit unit) {
executed.set(true);
}
};
TempLocationManager instance = instance(baseDir, false, hook);

instance.createDirStructure();

boolean ret = instance.cleanup(false);
assertTrue(ret);
assertFalse(executed.get());
}

private static Stream<Arguments> timeoutTestArguments() {
List<Arguments> argumentsList = new ArrayList<>();
for (boolean selfCleanup : new boolean[] {true, false}) {
for (String intercepted :
new String[] {"preVisitDirectory", "visitFile", "postVisitDirectory"}) {
argumentsList.add(Arguments.of(selfCleanup, intercepted));
argumentsList.add(Arguments.of(selfCleanup, true, intercepted));
argumentsList.add(Arguments.of(selfCleanup, false, intercepted));
}
}
return argumentsList.stream();
}

private TempLocationManager instance(Path baseDir, TempLocationManager.CleanupHook cleanupHook)
private TempLocationManager instance(
Path baseDir, boolean withStartupCleanup, TempLocationManager.CleanupHook cleanupHook)
throws IOException {
Properties props = new Properties();
props.put(ProfilingConfig.PROFILING_TEMP_DIR, baseDir.toString());
props.put(
ProfilingConfig.PROFILING_UPLOAD_PERIOD,
"0"); // to force immediate cleanup; must be a string value!

return new TempLocationManager(ConfigProvider.withPropertiesOverride(props), cleanupHook);
return new TempLocationManager(
ConfigProvider.withPropertiesOverride(props), withStartupCleanup, cleanupHook);
}
}

0 comments on commit ec1744f

Please sign in to comment.