diff --git a/.chloggen/hostmetricsreceiver-process-scraper-windows-optimization.yaml b/.chloggen/hostmetricsreceiver-process-scraper-windows-optimization.yaml new file mode 100644 index 0000000000000..15b95480d1dd2 --- /dev/null +++ b/.chloggen/hostmetricsreceiver-process-scraper-windows-optimization.yaml @@ -0,0 +1,29 @@ +# Use this changelog template to create an entry for release notes. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: 'enhancement' + +# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver) +component: hostmetricsreceiver + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: > + Reduced the cost of retrieving number of threads and parent process ID on Windows. + Disable the featuregate `hostmetrics.process.onWindowsUseNewGetProcesses` to fallback to the previous implementation. + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +issues: [32947, 38589] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: + +# If your change doesn't affect end users or the exported elements of any package, +# you should instead start your pull request title with [chore] or use the "Skip Changelog" label. +# Optional: The change log or logs in which this entry should be included. +# e.g. '[user]' or '[user, api]' +# Include 'user' if the change is relevant to end users. +# Include 'api' if there is a change to a library API. +# Default: '[user]' +change_logs: [user] diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/get_process_handles_others.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/get_process_handles_others.go new file mode 100644 index 0000000000000..ac3b3ee0ea131 --- /dev/null +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/get_process_handles_others.go @@ -0,0 +1,27 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//go:build !windows + +package processscraper // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper" + +import ( + "context" + + "github.com/shirou/gopsutil/v4/process" +) + +func getGopsutilProcessHandles(ctx context.Context) (processHandles, error) { + processes, err := process.ProcessesWithContext(ctx) + if err != nil { + return nil, err + } + wrapped := make([]wrappedProcessHandle, len(processes)) + for i, p := range processes { + wrapped[i] = wrappedProcessHandle{ + Process: p, + } + } + + return &gopsProcessHandles{handles: wrapped}, nil +} diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/get_process_handles_windows.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/get_process_handles_windows.go new file mode 100644 index 0000000000000..8051e2a826293 --- /dev/null +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/get_process_handles_windows.go @@ -0,0 +1,84 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//go:build windows + +package processscraper // import "github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/processscraper" + +import ( + "context" + "fmt" + "unsafe" + + "github.com/shirou/gopsutil/v4/process" + "go.opentelemetry.io/collector/featuregate" + "golang.org/x/sys/windows" +) + +var useNewGetProcessHandles = featuregate.GlobalRegistry().MustRegister( + "hostmetrics.process.onWindowsUseNewGetProcesses", + featuregate.StageBeta, + featuregate.WithRegisterDescription("If disabled, the scraper will use the legacy implementation to retrieve process handles."), +) + +func getGopsutilProcessHandles(ctx context.Context) (processHandles, error) { + if !useNewGetProcessHandles.IsEnabled() { + return getGopsutilProcessHandlesLegacy(ctx) + } + + snap, err := windows.CreateToolhelp32Snapshot(windows.TH32CS_SNAPPROCESS, 0) + if err != nil { + return nil, fmt.Errorf("could not create snapshot: %w", err) + } + defer func() { + _ = windows.CloseHandle(snap) + }() + + var pe32 windows.ProcessEntry32 + pe32.Size = uint32(unsafe.Sizeof(pe32)) + if err = windows.Process32First(snap, &pe32); err != nil { + return nil, fmt.Errorf("could not get first process: %w", err) + } + + wrappedProcesses := make([]wrappedProcessHandle, 0, 64) + for { + select { + case <-ctx.Done(): + return nil, ctx.Err() + default: + // Ignoring any errors here to keep same behavior as the legacy implementation + // based on the `process.ProcessesWithContext` from the `gopsutil` package. + p, _ := process.NewProcess(int32(pe32.ProcessID)) + if p != nil { + wrappedProcess := wrappedProcessHandle{ + Process: p, + parentPid: int32(pe32.ParentProcessID), + initialNumThreads: int32(pe32.Threads), + flags: flagParentPidSet | flagUseInitialNumThreadsOnce, + } + wrappedProcesses = append(wrappedProcesses, wrappedProcess) + } + } + + if err = windows.Process32Next(snap, &pe32); err != nil { + break + } + } + + return &gopsProcessHandles{handles: wrappedProcesses}, nil +} + +func getGopsutilProcessHandlesLegacy(ctx context.Context) (processHandles, error) { + processes, err := process.ProcessesWithContext(ctx) + if err != nil { + return nil, err + } + wrapped := make([]wrappedProcessHandle, len(processes)) + for i, p := range processes { + wrapped[i] = wrappedProcessHandle{ + Process: p, + } + } + + return &gopsProcessHandles{handles: wrapped}, nil +} diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process.go index 69d02d9f78d72..313f5917aecdd 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process.go @@ -109,18 +109,26 @@ func (p *gopsProcessHandles) Pid(index int) int32 { } func (p *gopsProcessHandles) At(index int) processHandle { - return p.handles[index] + return &(p.handles[index]) } func (p *gopsProcessHandles) Len() int { return len(p.handles) } +const ( + flagParentPidSet = 1 << 0 + flagUseInitialNumThreadsOnce = 1 << 1 +) + type wrappedProcessHandle struct { *process.Process + parentPid int32 + initialNumThreads int32 + flags uint8 // bitfield to track if fields are set } -func (p wrappedProcessHandle) CgroupWithContext(ctx context.Context) (string, error) { +func (p *wrappedProcessHandle) CgroupWithContext(ctx context.Context) (string, error) { pid := p.Process.Pid statPath := getEnvWithContext(ctx, string(common.HostProcEnvKey), "/proc", strconv.Itoa(int(pid)), "cgroup") contents, err := os.ReadFile(statPath) @@ -131,6 +139,36 @@ func (p wrappedProcessHandle) CgroupWithContext(ctx context.Context) (string, er return strings.TrimSuffix(string(contents), "\n"), nil } +func (p *wrappedProcessHandle) PpidWithContext(ctx context.Context) (int32, error) { + if p.flags&flagParentPidSet != 0 { + return p.parentPid, nil + } + + parentPid, err := p.Process.PpidWithContext(ctx) + if err != nil { + return 0, err + } + + p.parentPid = parentPid + p.flags |= flagParentPidSet + return parentPid, nil +} + +func (p *wrappedProcessHandle) NumThreadsWithContext(ctx context.Context) (int32, error) { + if p.flags&flagUseInitialNumThreadsOnce != 0 { + // The number of threads can fluctuate so use the initially cached value only the first time. + p.flags &^= flagUseInitialNumThreadsOnce + return p.initialNumThreads, nil + } + + numThreads, err := p.Process.NumThreadsWithContext(ctx) + if err != nil { + return 0, err + } + + return numThreads, nil +} + // copied from gopsutil: // GetEnvWithContext retrieves the environment variable key. If it does not exist it returns the default. // The context may optionally contain a map superseding os.EnvKey. @@ -150,19 +188,6 @@ func getEnvWithContext(ctx context.Context, key string, dfault string, combineWi return filepath.Join(segments...) } -func getProcessHandlesInternal(ctx context.Context) (processHandles, error) { - processes, err := process.ProcessesWithContext(ctx) - if err != nil { - return nil, err - } - wrapped := make([]wrappedProcessHandle, len(processes)) - for i, p := range processes { - wrapped[i] = wrappedProcessHandle{Process: p} - } - - return &gopsProcessHandles{handles: wrapped}, nil -} - func parentPid(ctx context.Context, handle processHandle, pid int32) (int32, error) { // special case for pid 0 and pid 1 in darwin if pid == 0 || (pid == 1 && runtime.GOOS == "darwin") { diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_metadata_benchmark_test.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_metadata_benchmark_test.go new file mode 100644 index 0000000000000..82c259ee14166 --- /dev/null +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_metadata_benchmark_test.go @@ -0,0 +1,65 @@ +// Copyright The OpenTelemetry Authors +// SPDX-License-Identifier: Apache-2.0 + +//go:build windows + +package processscraper + +import ( + "context" + "testing" + + "github.com/stretchr/testify/require" + "go.opentelemetry.io/collector/featuregate" + "go.opentelemetry.io/collector/scraper" +) + +func BenchmarkGetProcessMetadata(b *testing.B) { + ctx := context.Background() + config := &Config{} + + scraper, err := newProcessScraper(scraper.Settings{}, config) + if err != nil { + b.Fatalf("Failed to create process scraper: %v", err) + } + + benchmarks := []struct { + name string + useLegacy bool + parentPidEnabled bool + }{ + { + name: "New-ExcludeParentPid", + }, + { + name: "Old-ExcludeParentPid", + useLegacy: true, + }, + { + name: "New-IncludeParentPid", + parentPidEnabled: true, + }, + { + name: "Old-IncludeParentPid", + parentPidEnabled: true, + useLegacy: true, + }, + } + + for _, bm := range benchmarks { + b.Run(bm.name, func(b *testing.B) { + // Set feature gate value + previousValue := useNewGetProcessHandles.IsEnabled() + require.NoError(b, featuregate.GlobalRegistry().Set(useNewGetProcessHandles.ID(), !bm.useLegacy)) + defer func() { + require.NoError(b, featuregate.GlobalRegistry().Set(useNewGetProcessHandles.ID(), previousValue)) + }() + scraper.config.MetricsBuilderConfig.ResourceAttributes.ProcessParentPid.Enabled = bm.parentPidEnabled + + for i := 0; i < b.N; i++ { + // Typically there are errors, but we are not interested in them for this benchmark + _, _ = scraper.getProcessMetadata(ctx) + } + }) + } +} diff --git a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go index 5671d449ea62a..d5495835216ca 100644 --- a/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go +++ b/receiver/hostmetricsreceiver/internal/scraper/processscraper/process_scraper.go @@ -66,7 +66,7 @@ func newProcessScraper(settings scraper.Settings, cfg *Config) (*processScraper, settings: settings, config: cfg, getProcessCreateTime: processHandle.CreateTimeWithContext, - getProcessHandles: getProcessHandlesInternal, + getProcessHandles: getGopsutilProcessHandles, scrapeProcessDelay: cfg.ScrapeProcessDelay, ucals: make(map[int32]*ucal.CPUUtilizationCalculator), handleCountManager: handlecount.NewManager(), @@ -271,14 +271,17 @@ func (s *processScraper) getProcessMetadata(ctx context.Context) ([]*processMeta continue } - parentPid, err := parentPid(ctx, handle, pid) - if err != nil { - errs.AddPartial(0, fmt.Errorf("error reading parent pid for process %q (pid %v): %w", executable.name, pid, err)) + parentProcessID := int32(0) + if s.config.MetricsBuilderConfig.ResourceAttributes.ProcessParentPid.Enabled { + parentProcessID, err = parentPid(ctx, handle, pid) + if err != nil { + errs.AddPartial(0, fmt.Errorf("error reading parent pid for process %q (pid %v): %w", executable.name, pid, err)) + } } md := &processMetadata{ pid: pid, - parentPid: parentPid, + parentPid: parentProcessID, executable: executable, command: command, username: username,