Skip to content

Commit 5aa9438

Browse files
philrhcPhil Cummins
andauthored
prelinking (#107)
* adds prelink.rs * qualify freestanding function defined in another module --------- Co-authored-by: Phil Cummins <philip.cummins@bsc.es>
1 parent 82afee4 commit 5aa9438

File tree

2 files changed

+273
-264
lines changed

2 files changed

+273
-264
lines changed

src/lib.rs

Lines changed: 31 additions & 264 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use {
1010
indexmap::{IndexMap, IndexSet},
1111
serde::Deserialize,
1212
std::{
13-
collections::{HashMap, HashSet},
13+
collections::HashMap,
1414
env, fs,
1515
io::Cursor,
1616
iter,
@@ -19,7 +19,6 @@ use {
1919
str,
2020
},
2121
summary::{Escape, Locations, Summary},
22-
tar::Archive,
2322
wasm_convert::IntoValType,
2423
wasm_encoder::{
2524
CodeSection, ExportKind, ExportSection, Function, FunctionSection, Instruction, Module,
@@ -35,22 +34,20 @@ use {
3534
DirPerms, FilePerms, WasiCtx, WasiCtxBuilder, WasiView,
3635
},
3736
wit_parser::{Resolve, TypeDefKind, UnresolvedPackageGroup, WorldId, WorldItem, WorldKey},
38-
zstd::Decoder,
3937
};
4038

4139
mod abi;
4240
mod bindgen;
4341
mod bindings;
4442
pub mod command;
43+
mod prelink;
4544
#[cfg(feature = "pyo3")]
4645
mod python;
4746
mod summary;
4847
#[cfg(test)]
4948
mod test;
5049
mod util;
5150

52-
static NATIVE_EXTENSION_SUFFIX: &str = ".cpython-312-wasm32-wasi.so";
53-
5451
wasmtime::component::bindgen!({
5552
path: "wit",
5653
world: "init",
@@ -62,6 +59,12 @@ pub struct Ctx {
6259
table: ResourceTable,
6360
}
6461

62+
pub struct Library {
63+
name: String,
64+
module: Vec<u8>,
65+
dl_openable: bool,
66+
}
67+
6568
impl WasiView for Ctx {
6669
fn ctx(&mut self) -> &mut WasiCtx {
6770
&mut self.wasi
@@ -104,7 +107,7 @@ impl TryFrom<(&Path, RawComponentizePyConfig)> for ComponentizePyConfig {
104107
}
105108

106109
#[derive(Debug)]
107-
struct ConfigContext<T> {
110+
pub struct ConfigContext<T> {
108111
module: String,
109112
root: PathBuf,
110113
path: PathBuf,
@@ -207,85 +210,14 @@ pub async fn componentize(
207210
.filter_map(|&s| Path::new(s).exists().then_some(s))
208211
.collect::<Vec<_>>();
209212

210-
// Untar the embedded copy of the Python standard library into a temporary directory
211-
let stdlib = tempfile::tempdir()?;
212-
213-
Archive::new(Decoder::new(Cursor::new(include_bytes!(concat!(
214-
env!("OUT_DIR"),
215-
"/python-lib.tar.zst"
216-
))))?)
217-
.unpack(stdlib.path())?;
218-
219-
// Untar the embedded copy of helper utilities into a temporary directory
220-
let bundled = tempfile::tempdir()?;
221-
222-
Archive::new(Decoder::new(Cursor::new(include_bytes!(concat!(
223-
env!("OUT_DIR"),
224-
"/bundled.tar.zst"
225-
))))?)
226-
.unpack(bundled.path())?;
227-
228-
// Search `python_path` for native extension libraries and/or componentize-py.toml files. Packages containing
229-
// the latter may contain their own WIT files defining their own worlds (in addition to what the caller
230-
// specified as paramters), which we'll try to match up with `module_worlds` in the next step.
231-
let mut raw_configs = Vec::new();
232-
let mut library_path = Vec::with_capacity(python_path.len());
233-
for path in python_path {
234-
let mut libraries = Vec::new();
235-
search_directory(
236-
Path::new(path),
237-
Path::new(path),
238-
&mut libraries,
239-
&mut raw_configs,
240-
&mut HashSet::new(),
241-
)?;
242-
library_path.push((*path, libraries));
243-
}
244-
245-
// Validate the paths parsed from any componentize-py.toml files discovered above and match them up with
246-
// `module_worlds` entries. Note that we use an `IndexMap` to preserve the order specified in `module_worlds`,
247-
// which is required to be topologically sorted with respect to package dependencies.
248-
//
249-
// For any packages which contain componentize-py.toml files but no corresponding `module_worlds` entry, we use
250-
// the `world` parameter as a default.
251-
let configs = {
252-
let mut configs = raw_configs
253-
.into_iter()
254-
.map(|raw_config| {
255-
let config =
256-
ComponentizePyConfig::try_from((raw_config.path.deref(), raw_config.config))?;
257-
258-
Ok((
259-
raw_config.module.clone(),
260-
ConfigContext {
261-
module: raw_config.module,
262-
root: raw_config.root,
263-
path: raw_config.path,
264-
config,
265-
},
266-
))
267-
})
268-
.collect::<Result<HashMap<_, _>>>()?;
269-
270-
let mut ordered = IndexMap::new();
271-
for (module, world) in module_worlds {
272-
if let Some(config) = configs.remove(*module) {
273-
ordered.insert((*module).to_owned(), (config, Some(*world)));
274-
} else {
275-
bail!("no `componentize-py.toml` file found for module `{module}`");
276-
}
277-
}
213+
let embedded_python_standard_lib = prelink::embedded_python_standard_library()?;
214+
let embedded_helper_utils = prelink::embedded_helper_utils()?;
278215

279-
for (module, config) in configs {
280-
ordered.insert(module, (config, world));
281-
}
282-
283-
ordered
284-
};
216+
let (configs, mut libraries) =
217+
prelink::search_for_libraries_and_configs(python_path, module_worlds, world)?;
285218

286219
// Next, iterate over all the WIT directories, merging them into a single `Resolve`, and matching Python
287220
// packages to `WorldId`s.
288-
289221
let (mut resolve, mut main_world) = if let Some(path) = wit_path {
290222
let (resolve, world) = parse_wit(path, world)?;
291223
(Some(resolve), Some(world))
@@ -341,108 +273,11 @@ pub async fn componentize(
341273

342274
let summary = Summary::try_new(&resolve, &worlds)?;
343275

344-
struct Library {
345-
name: String,
346-
module: Vec<u8>,
347-
dl_openable: bool,
348-
}
349-
350-
let mut libraries = vec![
351-
Library {
352-
name: "libcomponentize_py_runtime.so".into(),
353-
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
354-
env!("OUT_DIR"),
355-
"/libcomponentize_py_runtime.so.zst"
356-
))))?,
357-
dl_openable: false,
358-
},
359-
Library {
360-
name: "libpython3.12.so".into(),
361-
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
362-
env!("OUT_DIR"),
363-
"/libpython3.12.so.zst"
364-
))))?,
365-
dl_openable: false,
366-
},
367-
Library {
368-
name: "libc.so".into(),
369-
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
370-
env!("OUT_DIR"),
371-
"/libc.so.zst"
372-
))))?,
373-
dl_openable: false,
374-
},
375-
Library {
376-
name: "libwasi-emulated-mman.so".into(),
377-
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
378-
env!("OUT_DIR"),
379-
"/libwasi-emulated-mman.so.zst"
380-
))))?,
381-
dl_openable: false,
382-
},
383-
Library {
384-
name: "libwasi-emulated-process-clocks.so".into(),
385-
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
386-
env!("OUT_DIR"),
387-
"/libwasi-emulated-process-clocks.so.zst"
388-
))))?,
389-
dl_openable: false,
390-
},
391-
Library {
392-
name: "libwasi-emulated-getpid.so".into(),
393-
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
394-
env!("OUT_DIR"),
395-
"/libwasi-emulated-getpid.so.zst"
396-
))))?,
397-
dl_openable: false,
398-
},
399-
Library {
400-
name: "libwasi-emulated-signal.so".into(),
401-
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
402-
env!("OUT_DIR"),
403-
"/libwasi-emulated-signal.so.zst"
404-
))))?,
405-
dl_openable: false,
406-
},
407-
Library {
408-
name: "libc++.so".into(),
409-
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
410-
env!("OUT_DIR"),
411-
"/libc++.so.zst"
412-
))))?,
413-
dl_openable: false,
414-
},
415-
Library {
416-
name: "libc++abi.so".into(),
417-
module: zstd::decode_all(Cursor::new(include_bytes!(concat!(
418-
env!("OUT_DIR"),
419-
"/libc++abi.so.zst"
420-
))))?,
421-
dl_openable: false,
422-
},
423-
Library {
424-
name: "libcomponentize_py_bindings.so".into(),
425-
module: bindings::make_bindings(&resolve, &worlds, &summary)?,
426-
dl_openable: false,
427-
},
428-
];
429-
430-
for (index, (path, libs)) in library_path.iter().enumerate() {
431-
for library in libs {
432-
let path = library
433-
.strip_prefix(path)
434-
.unwrap()
435-
.to_str()
436-
.context("non-UTF-8 path")?
437-
.replace('\\', "/");
438-
439-
libraries.push(Library {
440-
name: format!("/{index}/{path}"),
441-
module: fs::read(library)?,
442-
dl_openable: true,
443-
});
444-
}
445-
}
276+
libraries.push(Library {
277+
name: "libcomponentize_py_bindings.so".into(),
278+
module: bindings::make_bindings(&resolve, &worlds, &summary)?,
279+
dl_openable: false,
280+
});
446281

447282
// Link all the libraries (including any native extensions) into a single component.
448283
let mut linker = wit_component::Linker::default().validate(true);
@@ -534,8 +369,18 @@ pub async fn componentize(
534369
.env("PYTHONUNBUFFERED", "1")
535370
.env("COMPONENTIZE_PY_APP_NAME", app_name)
536371
.env("PYTHONHOME", "/python")
537-
.preopened_dir(stdlib.path(), "python", DirPerms::all(), FilePerms::all())?
538-
.preopened_dir(bundled.path(), "bundled", DirPerms::all(), FilePerms::all())?;
372+
.preopened_dir(
373+
embedded_python_standard_lib.path(),
374+
"python",
375+
DirPerms::all(),
376+
FilePerms::all(),
377+
)?
378+
.preopened_dir(
379+
embedded_helper_utils.path(),
380+
"bundled",
381+
DirPerms::all(),
382+
FilePerms::all(),
383+
)?;
539384

540385
// Generate guest mounts for each host directory in `python_path`.
541386
for (index, path) in python_path.iter().enumerate() {
@@ -628,7 +473,7 @@ pub async fn componentize(
628473

629474
Ok(())
630475
}
631-
replace(bundled.path(), "proxy", &module)?;
476+
replace(embedded_helper_utils.path(), "proxy", &module)?;
632477
};
633478

634479
for (mounts, world_dir) in world_dir_mounts.iter() {
@@ -828,84 +673,6 @@ fn add_wasi_and_stubs(
828673
Ok(())
829674
}
830675

831-
fn search_directory(
832-
root: &Path,
833-
path: &Path,
834-
libraries: &mut Vec<PathBuf>,
835-
configs: &mut Vec<ConfigContext<RawComponentizePyConfig>>,
836-
modules_seen: &mut HashSet<String>,
837-
) -> Result<()> {
838-
if path.is_dir() {
839-
for entry in fs::read_dir(path)? {
840-
search_directory(root, &entry?.path(), libraries, configs, modules_seen)?;
841-
}
842-
} else if let Some(name) = path.file_name().and_then(|name| name.to_str()) {
843-
if name.ends_with(NATIVE_EXTENSION_SUFFIX) {
844-
libraries.push(path.to_owned());
845-
} else if name == "componentize-py.toml" {
846-
let root = root.canonicalize()?;
847-
let path = path.canonicalize()?;
848-
849-
let module = module_name(&root, &path)
850-
.ok_or_else(|| anyhow!("unable to determine module name for {}", path.display()))?;
851-
852-
let mut push = true;
853-
for existing in &mut *configs {
854-
if path == existing.path.join("componentize-py.toml") {
855-
// When one directory in `PYTHON_PATH` is a subdirectory of the other, we consider the
856-
// subdirectory to be the true owner of the file. This is important later, when we derive a
857-
// package name by stripping the root directory from the file path.
858-
if root > existing.root {
859-
module.clone_into(&mut existing.module);
860-
root.clone_into(&mut existing.root);
861-
path.parent().unwrap().clone_into(&mut existing.path);
862-
}
863-
push = false;
864-
break;
865-
} else {
866-
// If we find a componentize-py.toml file under a Python module which will not be used because
867-
// we already found a version of that module in an earlier `PYTHON_PATH` directory, we'll
868-
// ignore the latest one.
869-
//
870-
// For example, if the module `foo_sdk` appears twice in `PYTHON_PATH`, and both versions have
871-
// a componentize-py.toml file, we'll ignore the second one just as Python will ignore the
872-
// second module.
873-
874-
if modules_seen.contains(&module) {
875-
bail!("multiple `componentize-py.toml` files found in module `{module}`");
876-
}
877-
878-
modules_seen.insert(module.clone());
879-
880-
if module == existing.module {
881-
push = false;
882-
break;
883-
}
884-
}
885-
}
886-
887-
if push {
888-
configs.push(ConfigContext {
889-
module,
890-
root: root.to_owned(),
891-
path: path.parent().unwrap().to_owned(),
892-
config: toml::from_str::<RawComponentizePyConfig>(&fs::read_to_string(path)?)?,
893-
});
894-
}
895-
}
896-
}
897-
898-
Ok(())
899-
}
900-
901-
fn module_name(root: &Path, path: &Path) -> Option<String> {
902-
if let [first, _, ..] = &path.strip_prefix(root).ok()?.iter().collect::<Vec<_>>()[..] {
903-
first.to_str().map(|s| s.to_owned())
904-
} else {
905-
None
906-
}
907-
}
908-
909676
fn add_wasi_imports<'a>(
910677
module: &'a [u8],
911678
imports: &mut HashMap<&'a str, HashMap<&'a str, FuncType>>,

0 commit comments

Comments
 (0)