Skip to content

Commit 29fc754

Browse files
committed
pstack: improve generation of DSO ranges
We generate a single address range for each DSO in the userspace program. However, the method is a bit convoluted: we select the largest "vm_end" value, and we use a start address computed from the start of the executable VMA and its vm_pgoff. The result of this is that, if a DSO file is mapped multiple times in different locations, we may produce an address range that spans other DSO address ranges. This is exactly what happened in osandov/drgn#574. Simplify this by just using the start address of the lowest VMA (the one which has vm_pgoff==0), and computing the end address by taking the file size, or the start of the next DSO address range, whichever is lower. This ensures there are no address range overlaps. It does not provide drgn with any additional information about the file address mappings, but as far as I can tell, that's just fine: drgn can't use multiple address ranges, because it doesn't know which file offsets they correspond to. To ensure this behavior is used by both the "dump" and "pstack" modes, implement factor out the DSO range generation code into a helper and use it in both places. Signed-off-by: Stephen Brennan <stephen.s.brennan@oracle.com>
1 parent fed5f45 commit 29fc754

File tree

1 file changed

+55
-53
lines changed

1 file changed

+55
-53
lines changed

drgn_tools/pstack.py

Lines changed: 55 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -269,30 +269,52 @@ def add_task_args(group: argparse.ArgumentParser) -> None:
269269
)
270270

271271

272+
def task_dsos(mm: Object) -> List[Tuple[str, int, int, int]]:
273+
"""
274+
Return the mapped DSOs for a task's ``mm_struct``. The return value is a
275+
tuple: (path, start, end, ino)
276+
"""
277+
# For the first pass, find any mapping which starts at offset 0 within the
278+
# file, and record the full range of the file (even if it's not actually
279+
# mapped for its full size). Also, record which file mappings have an
280+
# executable VMA, since we'll only care about those.
281+
file_range: Dict[str, Tuple[int, int, int]] = {}
282+
file_exec = set()
283+
VM_EXEC = 0x4
284+
for vma in for_each_vma(mm):
285+
if not vma.vm_file:
286+
continue
287+
path = os.fsdecode(d_path(vma.vm_file.f_path))
288+
if vma.vm_pgoff == 0:
289+
file_range[path] = (
290+
vma.vm_start.value_(),
291+
(vma.vm_start + vma.vm_file.f_inode.i_size).value_(),
292+
vma.vm_file.f_inode.i_ino.value_(),
293+
)
294+
if vma.vm_flags & VM_EXEC:
295+
file_exec.add(path)
296+
297+
# For the second pass, ensure there is no overlap between the ranges we
298+
# created previously. Since the whole file is not necessarily mapped, we
299+
# want to avoid creating overlapping ranges that drgn is not expecting (see
300+
# https://github.com/osandov/drgn/issues/574). Once we've ensured there are
301+
# no overlaps, and the file had an executable mapping, we can emit it.
302+
ranges = sorted(file_range.items(), key=lambda t: t[1][0])
303+
result = []
304+
for i, (path, (start, end, ino)) in enumerate(ranges):
305+
if i + 1 < len(ranges):
306+
end = min(end, ranges[i + 1][1][1])
307+
if path in file_exec:
308+
result.append((path, start, end, ino))
309+
return result
310+
311+
272312
def task_metadata(prog: Program, task: Object) -> Dict[str, Any]:
273313
"""Return JSON metadata about a task, for later use."""
274-
VM_EXEC = 0x4
275314
load_addrs = {}
276-
file_vm_end: Dict[str, int] = {}
277315
if task.mm:
278-
for vma in for_each_vma(task.mm):
279-
if not vma.vm_file:
280-
continue
281-
path = os.fsdecode(d_path(vma.vm_file.f_path))
282-
file_vm_end[path] = max(
283-
vma.vm_end.value_(),
284-
file_vm_end.get(path, 0),
285-
)
286-
if vma.vm_flags & VM_EXEC and vma.vm_file:
287-
file_start = (
288-
vma.vm_start - vma.vm_pgoff * task.mm.prog_["PAGE_SIZE"]
289-
).value_()
290-
inode = vma.vm_file.f_inode.i_ino.value_()
291-
load_addrs[path] = [file_start, vma.vm_end.value_(), inode]
292-
293-
# use the largest vm_end we find for the end of the address range
294-
for path in load_addrs:
295-
load_addrs[path][1] = file_vm_end[path]
316+
for path, start, end, inode in task_dsos(task.mm):
317+
load_addrs[path] = [start, end, inode]
296318
return {
297319
"pid": task.pid.value_(),
298320
"comm": task.comm.string_().decode("utf-8", errors="replace"),
@@ -709,39 +731,19 @@ def read_fn(_, count, offset, __):
709731

710732
up.add_memory_segment(0, 0xFFFFFFFFFFFFFFFF, read_fn, False)
711733

712-
# Do one pass where we record the maximum extent of the mapping for each
713-
# file, and we also detect each executable mapping, for which we prepare
714-
# modules.
715-
file_vm_end: Dict[str, int] = {}
716-
VM_EXEC = 0x4
717-
for vma in for_each_vma(mm):
718-
if vma.vm_file:
719-
path = os.fsdecode(d_path(vma.vm_file.f_path))
720-
file_vm_end[path] = max(
721-
vma.vm_end.value_(), file_vm_end.get(path, 0)
722-
)
723-
if vma.vm_flags & VM_EXEC and vma.vm_file:
724-
try:
725-
statbuf = os.stat(path)
726-
if statbuf.st_ino != vma.vm_file.f_inode.i_ino.value_():
727-
log.warning(
728-
"file %s doesn't match the inode on-disk, it may"
729-
" have been updated",
730-
path,
731-
)
732-
except OSError:
733-
pass
734-
file_start = (
735-
vma.vm_start - vma.vm_pgoff * mm.prog_["PAGE_SIZE"]
736-
).value_()
737-
mod = up.extra_module(path, create=True)
738-
mod.address_range = (file_start, vma.vm_end.value_())
739-
740-
# Now set the address ranges based on the observed file end, then load the
741-
# ELF files.
742-
for mod in up.modules():
743-
path = mod.name
744-
mod.address_range = (mod.address_range[0], file_vm_end[path])
734+
for path, start, end, ino in task_dsos(mm):
735+
try:
736+
statbuf = os.stat(path)
737+
if statbuf.st_ino != ino:
738+
log.warning(
739+
"file %s doesn't match the inode on-disk, it may"
740+
" have been updated",
741+
path,
742+
)
743+
except OSError:
744+
pass
745+
mod = up.extra_module(path, create=True)
746+
mod.address_range = (start, end)
745747
mod.try_file(path)
746748

747749
return up

0 commit comments

Comments
 (0)