Skip to content

Commit 5661255

Browse files
authored
Merge pull request #2081 from shirou/fix/windows_use_get_active_processor_group_count
[cpu][windows]: fix percpu stats on Windows hosts with multiple processor groups
2 parents 2f3645f + ddec60e commit 5661255

3 files changed

Lines changed: 89 additions & 3 deletions

File tree

cpu/cpu_windows.go

Lines changed: 64 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ var (
2525
procGetLogicalProcessorInformationEx = common.Modkernel32.NewProc("GetLogicalProcessorInformationEx")
2626
procGetSystemFirmwareTable = common.Modkernel32.NewProc("GetSystemFirmwareTable")
2727
procCallNtPowerInformation = common.ModPowrProf.NewProc("CallNtPowerInformation")
28+
procGetActiveProcessorGroupCount = common.Modkernel32.NewProc("GetActiveProcessorGroupCount")
2829
)
2930

3031
type win32_Processor struct { //nolint:revive //FIXME
@@ -263,6 +264,21 @@ func perCPUTimes() ([]TimesStat, error) {
263264

264265
// makes call to Windows API function to retrieve performance information for each core
265266
func perfInfo() ([]win32_SystemProcessorPerformanceInformation, error) {
267+
// On hosts with more than 64 logical CPUs Windows splits CPUs into Processor Groups
268+
// (up to 64 logical CPUs per group). The non-Ex NtQuerySystemInformation only returns
269+
// data for the calling thread's group, so whenever the Ex variant is available we
270+
// iterate every active group and concatenate the results. See issue #887.
271+
if common.ProcNtQuerySystemInformationEx.Find() == nil {
272+
return perfInfoAllGroups()
273+
}
274+
return perfInfoSingleGroup()
275+
}
276+
277+
// perfInfoSingleGroup queries SystemProcessorPerformanceInformation via the non-Ex
278+
// NtQuerySystemInformation call. This is the legacy fallback for environments where
279+
// NtQuerySystemInformationEx cannot be resolved; it only returns data for the calling
280+
// thread's processor group.
281+
func perfInfoSingleGroup() ([]win32_SystemProcessorPerformanceInformation, error) {
266282
// Make maxResults large for safety.
267283
// We can't invoke the api call with a results array that's too small.
268284
// If we have more than 2056 cores on a single host, then it's probably the future.
@@ -286,16 +302,61 @@ func perfInfo() ([]win32_SystemProcessorPerformanceInformation, error) {
286302

287303
// check return code for errors
288304
if retCode != 0 {
289-
return nil, fmt.Errorf("call to NtQuerySystemInformation returned %d. err: %s", retCode, err.Error())
305+
return nil, fmt.Errorf("call to NtQuerySystemInformation returned 0x%x: %w", retCode, err)
290306
}
291307

292308
// calculate the number of returned elements based on the returned size
293309
numReturnedElements := retSize / win32_SystemProcessorPerformanceInfoSize
294310

295311
// trim results to the number of returned elements
296-
resultBuffer = resultBuffer[:numReturnedElements]
312+
return resultBuffer[:numReturnedElements], nil
313+
}
297314

298-
return resultBuffer, nil
315+
// perfInfoAllGroups queries SystemProcessorPerformanceInformation for every active
316+
// processor group via NtQuerySystemInformationEx and concatenates the results. The
317+
// group index is passed as the InputBuffer per the Ex calling convention documented at
318+
// https://www.geoffchappell.com/studies/windows/km/ntoskrnl/api/ex/sysinfo/queryex.htm
319+
func perfInfoAllGroups() ([]win32_SystemProcessorPerformanceInformation, error) {
320+
// GetActiveProcessorGroupCount returns 0 only on failure; propagate the error
321+
// rather than silently defaulting to a single group and returning partial data.
322+
r, _, callErr := procGetActiveProcessorGroupCount.Call()
323+
if r == 0 {
324+
return nil, fmt.Errorf("GetActiveProcessorGroupCount returned 0: %w", callErr)
325+
}
326+
groupCount := uint16(r)
327+
328+
var result []win32_SystemProcessorPerformanceInformation
329+
for g := uint16(0); g < groupCount; g++ {
330+
numLP := windows.GetActiveProcessorCount(g)
331+
if numLP == 0 {
332+
return nil, fmt.Errorf("GetActiveProcessorCount returned 0 for processor group %d", g)
333+
}
334+
// buffer sized exactly for this group's logical CPU count
335+
buf := make([]win32_SystemProcessorPerformanceInformation, numLP)
336+
bufSize := uintptr(win32_SystemProcessorPerformanceInfoSize) * uintptr(numLP)
337+
var retSize uint32
338+
// InputBuffer is a USHORT (2 bytes) holding the target processor group index.
339+
group := g
340+
retCode, _, err := common.ProcNtQuerySystemInformationEx.Call(
341+
win32_SystemProcessorPerformanceInformationClass, // System Information Class -> SystemProcessorPerformanceInformation
342+
uintptr(unsafe.Pointer(&group)), // InputBuffer: pointer to USHORT group index
343+
unsafe.Sizeof(group), // InputBufferLength: sizeof(USHORT) = 2
344+
uintptr(unsafe.Pointer(&buf[0])), // pointer to first element in result buffer
345+
bufSize, // size of the buffer in memory
346+
uintptr(unsafe.Pointer(&retSize)), // pointer to the size of the returned results the windows proc will set this
347+
)
348+
if retCode != 0 {
349+
return nil, fmt.Errorf("call to NtQuerySystemInformationEx(group=%d) returned 0x%x: %w", g, retCode, err)
350+
}
351+
// Guard against a retSize that is not a whole number of entries or exceeds
352+
// the allocated buffer (e.g. CPU hot-add racing with GetActiveProcessorCount).
353+
if retSize%win32_SystemProcessorPerformanceInfoSize != 0 || uintptr(retSize) > bufSize {
354+
return nil, fmt.Errorf("NtQuerySystemInformationEx(group=%d) returned unexpected retSize=%d (bufSize=%d)", g, retSize, bufSize)
355+
}
356+
n := retSize / win32_SystemProcessorPerformanceInfoSize
357+
result = append(result, buf[:n]...)
358+
}
359+
return result, nil
299360
}
300361

301362
// SystemInfo is an equivalent representation of SYSTEM_INFO in the Windows API.

cpu/cpu_windows_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// SPDX-License-Identifier: BSD-3-Clause
2+
//go:build windows
3+
4+
package cpu
5+
6+
import (
7+
"context"
8+
"testing"
9+
10+
"github.com/stretchr/testify/require"
11+
)
12+
13+
// TestPerfInfoMatchesLogicalCount ensures perfInfo() returns one entry per logical
14+
// CPU on the host. This guards against regressions like issue #887 where only the
15+
// calling thread's processor group was reported on hosts with more than 64 CPUs.
16+
func TestPerfInfoMatchesLogicalCount(t *testing.T) {
17+
info, err := perfInfo()
18+
require.NoError(t, err)
19+
20+
n, err := CountsWithContext(context.Background(), true)
21+
require.NoError(t, err)
22+
23+
require.Len(t, info, n, "perfInfo must return one entry per logical CPU across all processor groups")
24+
}

internal/common/common_windows.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ var (
7373

7474
ProcGetSystemTimes = Modkernel32.NewProc("GetSystemTimes")
7575
ProcNtQuerySystemInformation = ModNt.NewProc("NtQuerySystemInformation")
76+
ProcNtQuerySystemInformationEx = ModNt.NewProc("NtQuerySystemInformationEx")
7677
ProcRtlGetNativeSystemInformation = ModNt.NewProc("RtlGetNativeSystemInformation")
7778
ProcRtlNtStatusToDosError = ModNt.NewProc("RtlNtStatusToDosError")
7879
ProcNtQueryInformationProcess = ModNt.NewProc("NtQueryInformationProcess")

0 commit comments

Comments
 (0)