Skip to content

Commit 0e6cf53

Browse files
author
Phil Cummins
committed
hmm
1 parent c189e6a commit 0e6cf53

File tree

3 files changed

+156
-146
lines changed

3 files changed

+156
-146
lines changed

CONTRIBUTING.md

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,16 @@ switch.
2121
### Prerequisites
2222

2323
- Tools needed to build [CPython](https://github.com/python/cpython) (Make, Clang, etc.)
24-
- [Rust](https://rustup.rs/) stable 1.81.0 or later, including the `wasm32-wasip1` and `wasm32-unknown-unknown` targets
25-
- Note that we currently use the `-Z build-std` Cargo option to build the `componentize-py` runtime with position-independent code (which is not the default for `wasm32-wasip1`) and this requires using a recent nightly build of Rust.
24+
- [Rust](https://rustup.rs/) stable 1.71 or later *and* nightly 2023-07-27 or later, including the `wasm32-wasi` and `wasm32-unknown-unknown` targets
25+
- Note that we currently use the `-Z build-std` Cargo option to build the `componentize-py` runtime with position-independent code (which is not the default for `wasm32-wasi`) and this requires using a recent nightly build of Rust.
2626

2727
For Rust, something like this should work once you have `rustup`:
2828

2929
```shell
3030
rustup update
31-
rustup target add wasm32-wasip1 wasm32-unknown-unknown
31+
rustup install nightly
32+
rustup component add rust-src --toolchain nightly
33+
rustup target add wasm32-wasi wasm32-unknown-unknown
3234
```
3335

3436
### Building and Running

src/lib.rs

Lines changed: 4 additions & 139 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ use {
1111
prelink::{embedded_helper_utils, embedded_python_standard_library},
1212
serde::Deserialize,
1313
std::{
14-
collections::{HashMap, HashSet},
14+
collections::HashMap,
1515
env, fs,
1616
io::Cursor,
1717
iter,
@@ -49,7 +49,6 @@ mod summary;
4949
mod test;
5050
mod util;
5151

52-
static NATIVE_EXTENSION_SUFFIX: &str = ".cpython-312-wasm32-wasi.so";
5352

5453
wasmtime::component::bindgen!({
5554
path: "wit",
@@ -213,73 +212,14 @@ pub async fn componentize(
213212
.filter_map(|&s| Path::new(s).exists().then_some(s))
214213
.collect::<Vec<_>>();
215214

216-
let embedded_python_standard_lib = embedded_python_standard_library().unwrap();
217-
let embedded_helper_utils = embedded_helper_utils().unwrap();
218-
219-
// Search `python_path` for native extension libraries and/or componentize-py.toml files. Packages containing
220-
// the latter may contain their own WIT files defining their own worlds (in addition to what the caller
221-
// specified as paramters), which we'll try to match up with `module_worlds` in the next step.
222-
let mut raw_configs: Vec<crate::ConfigContext<crate::RawComponentizePyConfig>> = Vec::new();
223-
let mut library_path: Vec<(&str, Vec<std::path::PathBuf>)> =
224-
Vec::with_capacity(python_path.len());
225-
for path in python_path {
226-
let mut libraries = Vec::new();
227-
search_directory(
228-
Path::new(path),
229-
Path::new(path),
230-
&mut libraries,
231-
&mut raw_configs,
232-
&mut HashSet::new(),
233-
)?;
234-
library_path.push((*path, libraries));
235-
}
236-
237-
let mut libraries = prelink::bundle_libraries(library_path)?;
238-
239-
// Validate the paths parsed from any componentize-py.toml files discovered above and match them up with
240-
// `module_worlds` entries. Note that we use an `IndexMap` to preserve the order specified in `module_worlds`,
241-
// which is required to be topologically sorted with respect to package dependencies.
242-
//
243-
// For any packages which contain componentize-py.toml files but no corresponding `module_worlds` entry, we use
244-
// the `world` parameter as a default.
245-
let configs = {
246-
let mut configs = raw_configs
247-
.into_iter()
248-
.map(|raw_config| {
249-
let config =
250-
ComponentizePyConfig::try_from((raw_config.path.deref(), raw_config.config))?;
251-
252-
Ok((
253-
raw_config.module.clone(),
254-
ConfigContext {
255-
module: raw_config.module,
256-
root: raw_config.root,
257-
path: raw_config.path,
258-
config,
259-
},
260-
))
261-
})
262-
.collect::<Result<HashMap<_, _>>>()?;
263-
264-
let mut ordered = IndexMap::new();
265-
for (module, world) in module_worlds {
266-
if let Some(config) = configs.remove(*module) {
267-
ordered.insert((*module).to_owned(), (config, Some(*world)));
268-
} else {
269-
bail!("no `componentize-py.toml` file found for module `{module}`");
270-
}
271-
}
215+
let embedded_python_standard_lib = embedded_python_standard_library()?;
216+
let embedded_helper_utils = embedded_helper_utils()?;
272217

273-
for (module, config) in configs {
274-
ordered.insert(module, (config, world));
275-
}
218+
let (configs, mut libraries) = prelink::search_for_libraries_and_configs(python_path, module_worlds)?;
276219

277-
ordered
278-
};
279220

280221
// Next, iterate over all the WIT directories, merging them into a single `Resolve`, and matching Python
281222
// packages to `WorldId`s.
282-
283223
let (mut resolve, mut main_world) = if let Some(path) = wit_path {
284224
let (resolve, world) = parse_wit(path, world)?;
285225
(Some(resolve), Some(world))
@@ -735,83 +675,8 @@ fn add_wasi_and_stubs(
735675
Ok(())
736676
}
737677

738-
fn search_directory(
739-
root: &Path,
740-
path: &Path,
741-
libraries: &mut Vec<PathBuf>,
742-
configs: &mut Vec<ConfigContext<RawComponentizePyConfig>>,
743-
modules_seen: &mut HashSet<String>,
744-
) -> Result<()> {
745-
if path.is_dir() {
746-
for entry in fs::read_dir(path)? {
747-
search_directory(root, &entry?.path(), libraries, configs, modules_seen)?;
748-
}
749-
} else if let Some(name) = path.file_name().and_then(|name| name.to_str()) {
750-
if name.ends_with(NATIVE_EXTENSION_SUFFIX) {
751-
libraries.push(path.to_owned());
752-
} else if name == "componentize-py.toml" {
753-
let root = root.canonicalize()?;
754-
let path = path.canonicalize()?;
755-
756-
let module = module_name(&root, &path)
757-
.ok_or_else(|| anyhow!("unable to determine module name for {}", path.display()))?;
758-
759-
let mut push = true;
760-
for existing in &mut *configs {
761-
if path == existing.path.join("componentize-py.toml") {
762-
// When one directory in `PYTHON_PATH` is a subdirectory of the other, we consider the
763-
// subdirectory to be the true owner of the file. This is important later, when we derive a
764-
// package name by stripping the root directory from the file path.
765-
if root > existing.root {
766-
module.clone_into(&mut existing.module);
767-
root.clone_into(&mut existing.root);
768-
path.parent().unwrap().clone_into(&mut existing.path);
769-
}
770-
push = false;
771-
break;
772-
} else {
773-
// If we find a componentize-py.toml file under a Python module which will not be used because
774-
// we already found a version of that module in an earlier `PYTHON_PATH` directory, we'll
775-
// ignore the latest one.
776-
//
777-
// For example, if the module `foo_sdk` appears twice in `PYTHON_PATH`, and both versions have
778-
// a componentize-py.toml file, we'll ignore the second one just as Python will ignore the
779-
// second module.
780-
781-
if modules_seen.contains(&module) {
782-
bail!("multiple `componentize-py.toml` files found in module `{module}`");
783-
}
784678

785-
modules_seen.insert(module.clone());
786679

787-
if module == existing.module {
788-
push = false;
789-
break;
790-
}
791-
}
792-
}
793-
794-
if push {
795-
configs.push(ConfigContext {
796-
module,
797-
root: root.to_owned(),
798-
path: path.parent().unwrap().to_owned(),
799-
config: toml::from_str::<RawComponentizePyConfig>(&fs::read_to_string(path)?)?,
800-
});
801-
}
802-
}
803-
}
804-
805-
Ok(())
806-
}
807-
808-
fn module_name(root: &Path, path: &Path) -> Option<String> {
809-
if let [first, _, ..] = &path.strip_prefix(root).ok()?.iter().collect::<Vec<_>>()[..] {
810-
first.to_str().map(|s| s.to_owned())
811-
} else {
812-
None
813-
}
814-
}
815680

816681
fn add_wasi_imports<'a>(
817682
module: &'a [u8],

src/prelink.rs

Lines changed: 147 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
11
#![deny(warnings)]
22

33
use std::{
4-
fs::{self},
5-
io::{self, Cursor},
4+
collections::{HashMap, HashSet}, fs::{self}, io::{self, Cursor}, ops::Deref, path::{Path, PathBuf}
65
};
76

8-
use anyhow::Context;
7+
use anyhow::{anyhow, bail, Context, Result};
8+
use indexmap::IndexMap;
99
use tar::Archive;
1010
use tempfile::TempDir;
1111
use zstd::Decoder;
1212

13-
use crate::Library;
13+
use crate::{ComponentizePyConfig, ConfigContext, Library, RawComponentizePyConfig};
14+
15+
static NATIVE_EXTENSION_SUFFIX: &str = ".cpython-312-wasm32-wasi.so";
1416

1517
pub fn embedded_python_standard_library() -> Result<TempDir, io::Error> {
1618
// Untar the embedded copy of the Python standard library into a temporary directory
@@ -87,3 +89,144 @@ fn library_from_so(library_name: &str) -> Result<Library, io::Error> {
8789
dl_openable: false,
8890
})
8991
}
92+
93+
pub fn search_for_libraries_and_configs<'a>(python_path: &'a Vec<&'a str>, module_worlds: &'a [(&'a str, &'a str)])
94+
-> Result<(IndexMap<String, (ConfigContext<ComponentizePyConfig>, Option<&'a str>)>, Vec<Library>), anyhow::Error> {
95+
let mut raw_configs: Vec<crate::ConfigContext<crate::RawComponentizePyConfig>> = Vec::new();
96+
let mut library_path: Vec<(&str, Vec<std::path::PathBuf>)> =
97+
Vec::with_capacity(python_path.len());
98+
for path in python_path {
99+
let mut libraries = Vec::new();
100+
search_directory(
101+
Path::new(path),
102+
Path::new(path),
103+
&mut libraries,
104+
&mut raw_configs,
105+
&mut HashSet::new(),
106+
)?;
107+
library_path.push((*path, libraries));
108+
}
109+
110+
let mut libraries = bundle_libraries(library_path)?;
111+
112+
// Validate the paths parsed from any componentize-py.toml files discovered above and match them up with
113+
// `module_worlds` entries. Note that we use an `IndexMap` to preserve the order specified in `module_worlds`,
114+
// which is required to be topologically sorted with respect to package dependencies.
115+
//
116+
// For any packages which contain componentize-py.toml files but no corresponding `module_worlds` entry, we use
117+
// the `world` parameter as a default.
118+
let configs: IndexMap<String, (ConfigContext<ComponentizePyConfig>, Option<&str>)> = {
119+
let mut configs = raw_configs
120+
.into_iter()
121+
.map(|raw_config| {
122+
let config =
123+
ComponentizePyConfig::try_from((raw_config.path.deref(), raw_config.config))?;
124+
125+
Ok((
126+
raw_config.module.clone(),
127+
ConfigContext {
128+
module: raw_config.module,
129+
root: raw_config.root,
130+
path: raw_config.path,
131+
config,
132+
},
133+
))
134+
})
135+
.collect::<Result<HashMap<_, _>>>()?;
136+
137+
let mut ordered = IndexMap::new();
138+
for (module, world) in module_worlds {
139+
if let Some(config) = configs.remove(*module) {
140+
ordered.insert((*module).to_owned(), (config, Some(*world)));
141+
} else {
142+
bail!("no `componentize-py.toml` file found for module `{module}`");
143+
}
144+
}
145+
146+
for (module, config) in configs {
147+
ordered.insert(module, (config, world));
148+
}
149+
150+
ordered
151+
};
152+
153+
Ok((configs, libraries))
154+
}
155+
156+
fn search_directory(
157+
root: &Path,
158+
path: &Path,
159+
libraries: &mut Vec<PathBuf>,
160+
configs: &mut Vec<ConfigContext<RawComponentizePyConfig>>,
161+
modules_seen: &mut HashSet<String>,
162+
) -> Result<(), anyhow::Error> {
163+
if path.is_dir() {
164+
for entry in fs::read_dir(path)? {
165+
search_directory(root, &entry?.path(), libraries, configs, modules_seen)?;
166+
}
167+
} else if let Some(name) = path.file_name().and_then(|name| name.to_str()) {
168+
if name.ends_with(NATIVE_EXTENSION_SUFFIX) {
169+
libraries.push(path.to_owned());
170+
} else if name == "componentize-py.toml" {
171+
let root = root.canonicalize()?;
172+
let path = path.canonicalize()?;
173+
174+
let module = module_name(&root, &path)
175+
.ok_or_else(|| anyhow!("unable to determine module name for {}", path.display()))?;
176+
177+
let mut push = true;
178+
for existing in &mut *configs {
179+
if path == existing.path.join("componentize-py.toml") {
180+
// When one directory in `PYTHON_PATH` is a subdirectory of the other, we consider the
181+
// subdirectory to be the true owner of the file. This is important later, when we derive a
182+
// package name by stripping the root directory from the file path.
183+
if root > existing.root {
184+
module.clone_into(&mut existing.module);
185+
root.clone_into(&mut existing.root);
186+
path.parent().unwrap().clone_into(&mut existing.path);
187+
}
188+
push = false;
189+
break;
190+
} else {
191+
// If we find a componentize-py.toml file under a Python module which will not be used because
192+
// we already found a version of that module in an earlier `PYTHON_PATH` directory, we'll
193+
// ignore the latest one.
194+
//
195+
// For example, if the module `foo_sdk` appears twice in `PYTHON_PATH`, and both versions have
196+
// a componentize-py.toml file, we'll ignore the second one just as Python will ignore the
197+
// second module.
198+
199+
if modules_seen.contains(&module) {
200+
bail!("multiple `componentize-py.toml` files found in module `{module}`");
201+
}
202+
203+
modules_seen.insert(module.clone());
204+
205+
if module == existing.module {
206+
push = false;
207+
break;
208+
}
209+
}
210+
}
211+
212+
if push {
213+
configs.push(ConfigContext {
214+
module,
215+
root: root.to_owned(),
216+
path: path.parent().unwrap().to_owned(),
217+
config: toml::from_str::<RawComponentizePyConfig>(&fs::read_to_string(path)?)?,
218+
});
219+
}
220+
}
221+
}
222+
223+
Ok(())
224+
}
225+
226+
fn module_name(root: &Path, path: &Path) -> Option<String> {
227+
if let [first, _, ..] = &path.strip_prefix(root).ok()?.iter().collect::<Vec<_>>()[..] {
228+
first.to_str().map(|s| s.to_owned())
229+
} else {
230+
None
231+
}
232+
}

0 commit comments

Comments
 (0)