diff --git a/public/content/developers/docs/data-and-analytics/data-flow/1-client-nodes.png b/public/content/developers/docs/data-and-analytics/data-flow/1-client-nodes.png new file mode 100644 index 00000000000..ac807c2af27 Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/data-flow/1-client-nodes.png differ diff --git a/public/content/developers/docs/data-and-analytics/data-flow/10-other-onchain.png b/public/content/developers/docs/data-and-analytics/data-flow/10-other-onchain.png new file mode 100644 index 00000000000..39740d1e9f4 Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/data-flow/10-other-onchain.png differ diff --git a/public/content/developers/docs/data-and-analytics/data-flow/2-request-pipelines.png b/public/content/developers/docs/data-and-analytics/data-flow/2-request-pipelines.png new file mode 100644 index 00000000000..52c9bb1c614 Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/data-flow/2-request-pipelines.png differ diff --git a/public/content/developers/docs/data-and-analytics/data-flow/3-raw-tables.png b/public/content/developers/docs/data-and-analytics/data-flow/3-raw-tables.png new file mode 100644 index 00000000000..e5c44c707cd Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/data-flow/3-raw-tables.png differ diff --git a/public/content/developers/docs/data-and-analytics/data-flow/4-decoding.png b/public/content/developers/docs/data-and-analytics/data-flow/4-decoding.png new file mode 100644 index 00000000000..9847e3bfaf3 Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/data-flow/4-decoding.png differ diff --git a/public/content/developers/docs/data-and-analytics/data-flow/5-decoded-data.png b/public/content/developers/docs/data-and-analytics/data-flow/5-decoded-data.png new file mode 100644 index 00000000000..8fd03807339 Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/data-flow/5-decoded-data.png differ diff --git a/public/content/developers/docs/data-and-analytics/data-flow/6-transformed-data.png b/public/content/developers/docs/data-and-analytics/data-flow/6-transformed-data.png new file mode 100644 index 00000000000..e4171298c43 Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/data-flow/6-transformed-data.png differ diff --git a/public/content/developers/docs/data-and-analytics/data-flow/7-aggregations.png b/public/content/developers/docs/data-and-analytics/data-flow/7-aggregations.png new file mode 100644 index 00000000000..c4513dff75a Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/data-flow/7-aggregations.png differ diff --git a/public/content/developers/docs/data-and-analytics/data-flow/8-offchain-data.png b/public/content/developers/docs/data-and-analytics/data-flow/8-offchain-data.png new file mode 100644 index 00000000000..0de2feab531 Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/data-flow/8-offchain-data.png differ diff --git a/public/content/developers/docs/data-and-analytics/data-flow/crypto-data-flow.png b/public/content/developers/docs/data-and-analytics/data-flow/crypto-data-flow.png new file mode 100644 index 00000000000..e06ad9b519e Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/data-flow/crypto-data-flow.png differ diff --git a/public/content/developers/docs/data-and-analytics/data-flow/decoded-log.png b/public/content/developers/docs/data-and-analytics/data-flow/decoded-log.png new file mode 100644 index 00000000000..8e7b50a4afe Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/data-flow/decoded-log.png differ diff --git a/public/content/developers/docs/data-and-analytics/data-flow/engine.png b/public/content/developers/docs/data-and-analytics/data-flow/engine.png new file mode 100644 index 00000000000..109b415e86d Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/data-flow/engine.png differ diff --git a/public/content/developers/docs/data-and-analytics/data-flow/index.md b/public/content/developers/docs/data-and-analytics/data-flow/index.md new file mode 100644 index 00000000000..bc153fa5c9a --- /dev/null +++ b/public/content/developers/docs/data-and-analytics/data-flow/index.md @@ -0,0 +1,201 @@ +--- +title: Understanding Data Flow +description: From initial data requests to final transformation of data into useful metrics. Data flows discusses retrieval, decoding, and transformation, and utilization of raw on-chain data to build blockchain applications. +lang: en +sidebarDepth: 3 +--- + +Understanding where crypto data originates is crucial for anyone involved in the blockchain space. This comprehensive breakdown will guide you through each step of the process, from the initial data request to the final transformation of data into a meaningful metric. + +In the first post of this series, we will explore how client nodes initiate data retrieval, the role of request pipelines, and the nature of raw on-chain data. You will also learn about the critical process of decoding raw data, transforming it into human-readable formats, and the subsequent aggregation of data for various use cases. We'll also touch on integrating off-chain sources and more complex on-chain schemas to provide a holistic view of data transformations. + +We won't get too deep on all the different informations that can be retrieved by interacting with the blockchain. Rather than exhaustively detailing every possible data point and schemas retrievable from the blockchain, this guide will offer a broad overview of the essential business logic needed to derive ecosystem level of data, such as the overall volume of the DEXes. Whether you’re a developer, data analyst, or researcher, this detailed guide will enhance your understanding of the crypto data landscape and its applications. + +![Crypto Data Flow](./crypto-data-flow.png) +*Crypto Data Flow* + +Not every data transformation pipeline follows clear steps like those outlined above. Leaner processes or with well-defined goals often skip, blur, or merge steps together, adapting to the unique needs of the task. This article will explore a process tailored for a data provider, which enables the data creation on any protocol or network, similar to platforms like Dune. + +## 1 - Client Nodes {#client-nodes} + +![Client Nodes](./1-client-nodes.png) + +The process of fetching data from the blockchain starts with a request to a client node. "Nodes" and "clients" are terms used to describe core software infrastructure that allows you to both read (request data) and write (submit transactions). For this purpose, every client implements a [JSON-RPC specification](/developers/docs/apis/json-rpc/), providing a uniform set of methods that applications can rely on, regardless of the specific node or client implementation. + +Every chain has its own client specification, and if you want data from multiple chains, you must run multiple clients, one for each network. You’re not required to run your own node. But if you don’t, you will need a communication channel with the person running it for you, like a node-as-a-service provider. + +**Node examples:** Geth, Lighthouse, Prism, Erigon. +**Node-as-a-service examples:** Alchemy, Infura, Akr, Quicknode, Tenderly + +## 2 - Request Pipelines {#request-pipelines} + +![Request Pipelines](./2-request-pipelines.png) + +Retrieving blocks, transactions, or events through an interface, such as a JSON RPC request, is a common requirement for accessing on-chain data. Nodes can provide both historical and current network states. However, interacting with the node API is a complex endeavor, requiring a structured approach to manage the information flow. A robust request pipeline must be established to handle incoming requests and efficiently transform the raw data into organized tables. + +To simplify the challenges associated with request pipelines, data extraction tools have emerged as valuable assets in blockchain data management. Tools like [Cryo](https://github.com/paradigmxyz/cryo), provide an efficient interface for extracting blockchain data such as blocks, transactions, logs, and traces, utilizing JSON-RPC protocols and offering outputs in formats like Parquet and CSV for further analysis. Similarly, the EVM Query Language ([EQL](https://github.com/iankressin/eql)) offers a SQL-like syntax for querying Ethereum and EVM-compatible blockchains, making it easier for developers and analysts to retrieve data using familiar concepts from relational databases while also offering extraction to various file formats. These tools streamline communication with nodes and reduce the need to build complex custom pipelines, fitting seamlessly into ELT workflows and enhancing the efficiency of broader data transformation processes + +## 3 - Raw tables (Onchain Data) {#raw-tables} + +![Raw tables](./3-raw-tables.png) + +**What is it** Raw on-chain data encompasses anything you can extract through an RPC call from a node. While the full list of available data is extensive, in this post, we've narrowed it down to the most common types that follow standard schemas, making them easier to transform later. + +Note: Raw in this context only means the opposite of decoded, explained below. + +**Examples:** Blocks, Transactions, Accounts, Raw Traces, Raw Logs +**Use Cases:** +- Network level metrics +- Source for Decoded Data + +**Metrics obtained by transforming raw onchain data:** +- TPS - transaction per second +- Gas per transaction +- Top contracts called +- Distinct new accounts +- Daily unique transaction signers +- Block meantime +- Transaction size + + +## 4 - Decoding {#decoding} + +![Decoding](./4-decoding.png) + +If you explore raw on-chain data, you'll notice that the data in function inputs, outputs, and logs doesn't look like what you see on Etherscan. The main challenge is the lack of context—what each field or value represents—and the fact that everything is stored in bytes (hexadecimal format). + +![Transaction Receipt](./transaction-receipt.png) +*Example of a raw transaction receipt.* + +Decoding is the process of translating the data in raw events and traces into a human-readable format (with function and parameter names) using the contract ABI. This can only be achieved if you have access to the contract ABI or the original Solidity code. + +![Decoded Log](./decoded-log.png) +*The above transaction log, translated to a readable event. It was a Deposit of 4.10^17 wad to the WETH, with destination the [`0x82273BFbAcF1C07c4EEa6b1460F8A36479dE648c`](https://etherscan.io/address/0x82273BFbAcF1C07c4EEa6b1460F8A36479dE648c) account.* + + +The reason this transformation is necessary is that smart-contracts are stored on-chain as opcodes—a low-level EVM readable set of instructions —rather than in Solidity. Solidity is a high-level language that compiles into opcodes, which is what the nodes execute. Nodes are unaware of the original Solidity code that generated the opcodes, resulting in a lack of information about function names, parameter names, and output meanings. + +Access to the contract's code is only possible if someone with the deployed code makes it public, typically by submitting it to a database or a service like Etherscan. With the Solidity code, you can compile it, compare it to the on-chain opcodes, and verify its authenticity. Alternatively, you can use the [contract ABI](https://docs.soliditylang.org/en/latest/abi-spec.html), which is generated from Solidity at the compilation step. The ABI maps function and event signatures to their actual names and includes the encoding details of parameters, such as word size and variable types. However, it is important to note that not all contract codes and ABIs are publicly available, which means that not all contracts and transactions can be decoded. + +## 5 - Decoded Data {#decoded-data} + +![Decoded Data](./5-decoded-data.png) + +**What is it?** Decoded data includes logs, traces, transfers, and view functions translated into human-readable parameters. In a data flow, some form of decoding is almost always performed, even for less common data types. + +**Examples:** Decoded Traces, Decoded Logs, ERC20 Transfer Events +**Use Cases:** +- Primary source for all protocol level metrics +- Primary source for Transformed Data + +**Metrics obtained by transforming decoded data:** +- Aave deposits and withdraws +- Daily Liquidations +- ProtocolTVL +- DAU +- Bridges ilows and outflows +- Volume and Open Interest +- Protocol Revenue +- ETH2 contract deposits + +Having all decoded traces, logs, and ERC20 transfer events extracted and loaded into tables is the most convenient method for establishing an ELT (Extract, Load, Transform) process, allowing for the derivation of a wide range of protocol data. + +## Transformation Engine {#transformation-engine} + +![Transformation Engine](./engine.png) + +Now that you have all this data available, a transformation infrastructure is needed to process and convert it into meaningful metrics. Transformation engines can vary widely from company to company; they can range from something as simple as a Python notebook to a complex data warehouse infrastructure (Eg. Dagster/Airflow, DBT, Snowflake/Databricks). + +The transformation process can take in raw or decoded data, along with other on-chain sources, and combine it with prices or other off-chain data to produce metric outputs. In the flow diagram, you’ll notice multiple arrows directing data into the transformation engine. After data is transformed, the output can be saved and reused—either in whole or in part—for further transformations. Data processing is essentially an endless cycle of reshaping and refining previous outputs to extract new insights. + +## 6 - Transformed Data {#transformed-data} + +![Transformed Data](./6-transformed-data.png) + +**What is it?** Transformed data consists of business-level metrics or aggregates derived from all the sources available, including raw and decoded. After collecting all the raw and decoded information, the next step is to manipulate and combine it to create meaningful metrics. Transformed data is the output of any data transformation process, and it's often stored for future use to avoid recomputation. By transforming data once and reusing it whenever necessary, you can optimize efficiency and performance. + +**Examples:** Uniswap SWAPs, Aave TVL, DEX.trades + +**Use Cases:** +- Feed the analytics layer (analytics UI, dashboards, charts) +- Source for other Transformed Data (reprocess) +- Source for aggregations + +## 7 - Aggregations {#aggregations} + +![Aggregations](./7-aggregations.png) + +Users often seek ecosystem-level data, such as answers to questions like “How much volume are DEXes generating?”, “What is the total value locked (TVL) in DeFi?”, or “How many users do crypto social platforms have?”. These kinds of questions rarely have shortcuts; instead, they typically require a step-by-step "brute force" approach to generate meaningful answers. This involves creating metrics for a single protocol (e.g., Uniswap), then aggregating metrics across multiple protocols within a network (e.g., Ethereum DEXes), and finally aggregating these metrics across multiple chains to reflect the broader market (e.g., the entire DEX ecosystem). + +**Example Metrics:** +- Aggregate metrics for one protocol (Uniswap Metrics) +- Aggregate metrics for multiple protocols into a Network Metric (Ethereum DEXes Metrics) +- Aggregate metrics from multiple chains into market metrics (DEXes ecosystem Metrics) + +## 8 - Offchain Data {#offchain-data} + +![Offchain Data](./8-offchain-data.png) + +**What is it?** Off-chain data refers to any data that doesn’t originate from a blockchain node. Blockchain metrics can be significantly enhanced by incorporating data that isn't available onchain, typically sourced from external data providers. Centralized exchanges (CEXes) are among the most common sources, especially when converting metrics into dollar terms, as they provide the necessary price information for tokens at specific times. Additionally, some data may reside in more centralized or semi-decentralized databases. Ingesting this data directly into your system can serve as a shortcut to the Extract-Translate-Transform process. + +**Examples:** +- Token prices from centralized exchanges +- NFT collections metadata +- Data from “appchains” +- Governance proposals from platforms like Snapshot +- Social APIs (e.g., Lens and Farcaster) +- MEV (Maximal Extractable Value) data + +**Example Metrics:** +- Converting token amounts and volume to USD terms. +- Enriching protocol data where activity occurs off-chain +- Sourcing data from pseudo-chains or other sources not directly supported on-chain + +## 9 - Prices (offchain data) {#prices} +Prices play such a crucial role in the crypto data flow that they warrant a dedicated explanation. There is no single, definitive source of truth for prices in the crypto world. Each exchange and blockchain can have different prices for the same trading pair at any given time. To address this, the prices table aims to aggregate and time-weight average (TWAP) these prices from multiple sources, including both centralized exchanges (CEXes) and decentralized exchanges (DEXes), into a single, representative value for a given time. + +The process is not trivial; it involves inputting known prices, calculating volumes, removing outliers, and filtering out lagged or less representative markets. By carefully handling these steps, the process ensures a more accurate and reliable pricing metric that reflects the broader market dynamics. + +## 10 - Other On-Chain Data {#other-onchain} + +![Other On-Chain Dat](./10-other-onchain.png) + +**What is it?** This refers to pre-indexed data extracted from blockchains that is less standardized and more specific. Blockchains don’t just store blocks, transactions, and logs; they also contain a wide variety of other data types, which may require additional steps to extract and are often harder to standardize into table formats. While EVM-compatible chains usually have defined standards, more exotic chains (e.g., Beacon chain) also produce and store unique data. Depending on your application, your data needs might be entirely centered around these specific and less conventional data types, and the process might not be as well defined as the steps presented above. + + +**Examples include:** +- Mem Pool data for high-frequency trading or block building, +- Beacon chain deposits and withdrawals for staking providers, +- Blobs data for L2 sequencers. +- P2P communications + +Below are some limited examples, particularly relevant for network and protocol metrics: + + +| Example | What is it? | Complexity | What makes it complex for a ELT process? | Use case | +| ----- | ----- | ----- | ----- | ----- | +| Token Metadata | Name, symbol, and decimals from tokens; extracted via view calls. | Easy | Identifying token contracts; handling mutable values. | Translating token addresses to names and symbols; calculating real amounts. . | +| ERC20 View Functions | BalanceOf and SupplyOf from ERC20; extracted via view calls. | Medium | When triggering a call for indexing? capturing all value changes (e.g., rebasing, contract construct)? | Tracking wallet balances over time; monitoring token supply changes. | +| Contract-Specific View Functions | View functions access state and output relevant contract’s data. | Hard | What triggers a call for indexing? What functions do people care about? | Create protocol metrics that were not surfaced in logs or traces. | +| State Changes | State diffs at every transaction. | Hard | Decoding state changes; determining which memory addresses to index. | Create protocol metrics that were not surfaced in logs or traces. | +| Contract internal variables | Variables saved during execution but not stored. | Hard | Requires runtime execution access; identifying relevant variables to index. | Create protocol metrics that were not surfaced in logs or traces. | +| Shadow/Ghost Logs | Additional logs emitted by modified contract code. | Hard | Determining which are the user-desired shadow logs; Rewrintg the appropriate code. | Accessing internal states and variables, formatted as logs. | + +## 11 - View Functions Calls (Other On-Chain Data) {#view-functions-call} +In EVM-compatible blockchains, much of the data categorized as “Other On-Chain Data” can be accessed through the outputs of “view functions”. View functions are a type of Solidity function that does not modify the blockchain state; instead, they simply read and transform existing state data. This programmatic method allows you to read the network state and return data for use by other functions in the code. Since view functions don’t alter the state, they can be executed without incurring gas costs. + +You can also make external view function calls to a node (in Etherscan, read contract tab), as client nodes implement an API for executing these calls and returning the results. However, there is a limitation: regular client nodes do not store historical state data, so view functions can only return the latest state at the time of the call. + +A naive pipeline designed to index view function outputs would only capture data from the current state, at the time of the request. To gather historical data, you would need to start the node from Genesis and synchronize requests every time indexing is required, or use a historical node capable of returning historical states. + +The second challenge is that the output of a view function isn’t always a direct representation of the state—it can also involve transformations. For example, the `BalanceOf` function returns the token balance of an account. For standard tokens, the balance is stored as a state mapping, but for rebasing tokens, the balance is calculated as `amount_scale * rebase_index`, where rebase_index is a global variable that adjusts all account balances simultaneously. With a few thousand accounts rebasing at every block, indexing this data can quickly result in billions of rows. If, instead, you indexed the amount_scale and the `rebase_index`, you could calculate user balance whenever you need it, and it would represent much less data. + +This leads to the third challenge: determining when to index. You’ll need to define triggers for indexing, such as every block, every transfer event, or whenever a specific call trace occurs. Since each application has unique requirements, this often necessitates custom pipelines. + +In conclusion, working with view functions can be quite complex, and designing an effective ELT process for storing view function outputs requires thoughtful planning and a strategic approach. + + +## Conclusion {#conclusion} +Understanding crypto data flows is crucial for anyone working in the blockchain space. This article has explored the key steps in the data journey—from retrieving raw on-chain data to transforming it into actionable metrics using client nodes, request pipelines, and transformation engines. We’ve covered the importance of decoding data, handling view function calls, and integrating off-chain information like prices, emphasizing the complexity and care required in each process. + +Effective management of crypto data flows allows for the creation of valuable insights and metrics, driving better decisions and innovations. As the blockchain landscape continues to evolve, mastering these data flows will be essential for staying ahead and making the most of blockchain's potential. \ No newline at end of file diff --git a/public/content/developers/docs/data-and-analytics/data-flow/transaction-receipt.png b/public/content/developers/docs/data-and-analytics/data-flow/transaction-receipt.png new file mode 100644 index 00000000000..5a648197146 Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/data-flow/transaction-receipt.png differ diff --git a/public/content/developers/docs/data-and-analytics/data-stacks/index.md b/public/content/developers/docs/data-and-analytics/data-stacks/index.md new file mode 100644 index 00000000000..967422f9ffd --- /dev/null +++ b/public/content/developers/docs/data-and-analytics/data-stacks/index.md @@ -0,0 +1,70 @@ +--- +title: Data Stacks +description: Explores three key approaches to handling blockchain data. Lean Stacks for efficient, goal-specific pipelines, Indexing Engines for standardized and reusable queries, and Data Lakes for storing and analyzing extensive datasets. +lang: en +sidebarDepth: 3 +--- + +Optimizing for a use case is paramount in the ever-evolving landscape of data processing and analytics. As organizations grapple with vast amounts of data, the need for streamlined, effective, and purpose-driven data management systems becomes increasingly critical. This post delves into three distinct approaches to data handling, each offering unique advantages and suited for different requirements. Together with outsourcing data extraction covered in the previous post, each of these data management systems will help you align your data workflows with your specific goals and requirements. + +**The Lean Stack** represents an optimized approach designed to deliver only the necessary metrics through streamlined request pipelines, decoding processes, and transformation engines. By minimizing unnecessary data extraction and processing, this approach ensures efficiency but is highly tailored to its initial design, making it less flexible for evolving needs. + +**Indexing Engines or Subgraphs** offer a standardized way to structure and query blockchain data, allowing for greater flexibility and reuse across multiple applications. By using predefined rules and user-defined scripts, these engines transform raw data into structured entities accessible through GraphQL APIs, making them ideal for applications requiring frequent and varied data queries. + +On the other hand, the **Data Lake** utilizes an ELT (Extract, Load, Transform) methodology, emphasizing the storage of vast quantities of raw data for subsequent analysis and transformation. This approach is characterized by its exploratory capabilities, allowing users to delve into extensive datasets, filter relevant information, and model data to derive meaningful insights. + +Each of these data management strategies serves unique needs, offering different strengths and trade-offs. This chapter delves into each approach's workings, benefits, and use cases, helping you understand how to align your data workflows with your specific goals and requirements. + +## The lean stack {#lean-stack} + +**What is it? \-** The Lean Stack is an approach where request pipelines, decoding processes, and transformation engines are meticulously designed to deliver only the required metrics, with no extra data processing or storage. It is the least flexible approach, as the stack is tailored specifically to process the data it was initially designed for and nothing more. + +Conceptually, the Lean Stack is highly customizable and can employ any language. It follows a classic ETL (Extract, Transform, Load) process, extracting only the necessary data without any excess. This approach eliminates the need for extensive data storage or processing beyond what is essential for the desired output metrics, making the system highly efficient and "lean". + +In this configuration, system processes often share the same infrastructure, blurring the lines between each step in the data flow (explored in part 1 of the series). For instance, when decoding is required, ABIs are inputed as source data, and the same engine manages the translation of raw data into readable parameters and performs the necessary transformations, all in one integrated process. The Lean Stack can also utilize "Node-as-a-Service" solutions, interacting with NaaS APIs instead of direct Node RPC calls. + +Nevertheless, what we call a lean stack, can be a very complex system. It can segment the workflow into distinct steps, deploying different solutions for each phase and storing intermediary data when necessary for optimization. However, its primary strength remains in its streamlined, goal-oriented design, focused solely on producing the required metrics with minimal overhead. + +## Indexing Engines (Subgraphs) {#subgraphs} + +**What is it?** Indexing engines provide a standardized way to manage data flows, enabling reuse across various outputs and applications. By predefining these flows, data processing can be outsourced to third parties as long as they implement the required stack. + +The primary advantage of using an indexing engine is outsourcing the operation to an “Indexer” who maintains the indexing service, keeping it online and operational. This approach is especially attractive for small teams, as it eliminates the need for any dedicated data infrastructure, similar to outsourcing with “Indexed Data Providers” (covered in [Outsourced Data Types](/developers/docs/data-and-analytics/outsourcing-data)). In this context, the stack refers to the system used by the indexer, rather than the system of the user of the final data. + +![Subgraphs](./subgraphs.png) + +Indexing engines have a similar flow as the “Indexed Data Providers”. Still, instead of the provider defining the code that dictates the output, users of indexing engines create their own code scripts to define the desired output, adhering to specific subgraph guidelines. The processed data is then made accessible via a GraphQL API, as users typically do not manage the engines directly. + +A **Subgraph** is essentially a collection of code scripts designed to generate a specific output. It comprises several components that work together: + +* A **manifest** contains information on data sources, templates, and some subgraph metadata (e.g. description, repository, etc.). The manifest defines the contracts that are going to be indexed by a subgraph, the relevant events and function calls, and how to transform the raw data into entities that will be stored and later delivered to the user. +* **A schema** defines the structure of the subgraph's output data and details how it can be queried using GraphQL. It includes entity definitions and specifies the structured data for each entity. +* **AssemblyScripts** **Mappings** are scripts that execute when specified events occur, as detailed in the manifest. These mappings define event handlers that transform incoming data from events or function calls, subsequently storing the processed data in the Graph node store. + +**Examples:** The Graph, Alchemy, Sentio, Ponder. + +## Data Lake {data-lake} + +**What is it?** The data lake stack uses an ELT (Extract, Load, Transform) process, differing from the ETL approach seen in the Lean Stack and Indexing Engines. This stack leverages a data warehouse to store large quantities of raw and decoded data, much of which may not be immediately used. It offers a clear separation between processes like request pipelines, decoding, storage, and transformations. Outsourcing to raw and decoded streamers can be used, bypassing the need to handle the requesting and decoding process inhouse. + +Given that a data warehouse is used, SQL is typically the language of choice for data transformation. The process to generate metrics using a data lake stack includes the following steps: + +* **Exploration:** Since the data lake contains more data than strictly necessary, the initial step is to explore and identify the contracts and events relevant to the desired output. With all data readily accessible, you can investigate the parameters directly, eliminating the need to consult project documentation. +* **Filtering:** Data lakes often store data using broad, unified schemas for similar data types. Filtering involves narrowing down these extensive datasets to include only the specific contracts and functions your protocol requires. This step aims to minimize the dataset size to reduce computational costs, and filtering is performed on demand, solely for the data that will be used. +* **Modeling:** This phase involves creating models for your protocol by utilizing data from various sources, including raw, decoded, offline, and custom-ingested data. Multiple models can be generated as needed, and saved transformations can be used later for aggregation, allowing for comprehensive and flexible data analysis. + +**Examples:** Allium, Flipside, Dune, Chainbase, Transpose + +## Online SQL + +Lastly, we highlight Online SQL tools, which, while not a stack per se, have become crucial for analyzing and leveraging the vast amounts of data generated by blockchain networks. + +**What is it?** \- Online SQL tools provide the necessary infrastructure for the otherwise expensive and cumbersome process of using a data lake stack to regular users. These platforms enable users to analyze, transform, and create metrics from blockchain data as a back-end process and also offer additional features like visualization tools and dashboards to showcase insights. + +**How does it work? \-** Users interact with an online interface that provides access to an underlying data warehouse. Through SQL queries, users can transform and combine available data to derive meaningful metrics. The platform handles data ingestion and storage, with user-submitted queries often becoming part of the platform's intellectual property. These tools manage the materialization of some queries into transformed data tables, facilitating efficient access and analysis. Providers typically monetize through various channels, including selling API access, computation services, and access to premium data. + +**Examples:** Flipside, Dune, Chainbase, Transpose + +## Conclusion + +In the diverse world of crypto data, finding a pure stack that strictly adheres to just one of the types presented is rare. Most data providers and organizations employ a hybrid approach, leveraging a combination of these solutions to meet their specific needs. This often includes mixing different stacks and outsourcing various data processes to optimize for ther business proposal, scalability, and cost-effectiveness. The flexibility to blend these methodologies allows data players to tailor their systems, ensuring they can adapt to the evolving demands of the blockchain landscape and deliver accurate, reliable insights across a range of applications. \ No newline at end of file diff --git a/public/content/developers/docs/data-and-analytics/data-stacks/subgraphs.png b/public/content/developers/docs/data-and-analytics/data-stacks/subgraphs.png new file mode 100644 index 00000000000..57fffec8da5 Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/data-stacks/subgraphs.png differ diff --git a/public/content/developers/docs/data-and-analytics/extraction-tranformation-example/index.md b/public/content/developers/docs/data-and-analytics/extraction-tranformation-example/index.md new file mode 100644 index 00000000000..2bdc49b4292 --- /dev/null +++ b/public/content/developers/docs/data-and-analytics/extraction-tranformation-example/index.md @@ -0,0 +1,213 @@ +--- +title: A Simple Extraction and Transformation Example +description: A practical example of extracting and decoding Ethereum transaction data, highlighting the steps in the process. Perfect for developers, analysts, and enthusiasts looking to explore Web3 data workflows. +lang: en +sidebarDepth: 3 +--- + +Understanding Ethereum data and analysis is easiest when you see it in action. Let’s start with a simple yet practical scenario: a user makes a transaction, and we want to uncover the resulting actions (events) recorded on the blockchain. + +This example is tailored for a wide audience—whether you're an app developer looking to display a transaction data in your application, a data analyst building metrics for token insights report, or simply a user or enthusiast curious about how Ethereum data is processed. + +No matter your perspective, this example provides a hands-on way to understand Ethereum data extraction and transformation. For those who are not developers, don’t worry—we’ll include some code examples below, but focus on the comments and explanations that will break down the operations. The goal is to make the process clear and approachable for everyone, regardless of technical background. + +## Extracting a transaction {#extraction} + +```python +# Import the helpers libraries +import requests # Simplifies sending requests to APIs +import json # parse JSON objects (structured text) from simple text strings +from web3 import Web3 # library with functions helping interact with Ethereum, used here for decoding + +# Define the connection to a node-as-a-service provider, avoiding the need to run our own node +node_as_service_api_url = "https://eth.llamarpc.com" +# Define the JSON-RPC method and the input parameters +payload = { + "jsonrpc": "2.0", + "method": "eth_getTransactionReceipt", + "params": ["0x2f8edd9bba379efa228bf8f39396e908d83ea418c602a3dd1bf178749c1714c0"] #transaction_hash +} +# Send the data request, receive the response, and save it +response = requests.post(url=node_as_service_api_url, json=payload) +# Read the response +transaction_receipt = response.json()['result'] +``` + +Accessing blockchain data starts by connecting with a [client node](/developers/docs/nodes-and-clients/). Running your own node can be daunting, so this example uses a [node-as-a-service](/developers/docs/nodes-and-clients/nodes-as-a-service/) provider. This service allows us to request the same data, using the same methods, as if we were running a node ourselves. Although connecting directly to a self-hosted node via its port would yield the same results, the node-as-a-service approach is more accessible and easily replicable. + +Numerous methods are available via [JSON-RPC](/developers/docs/apis/json-rpc/) for requesting different types of data from an Ethereum node. For instance, you can retrieve a full block's information, query an account's ETH balance, or access transaction details. For those interested, a comprehensive list of methods and their expected outputs, which nodes must support, is available [here](https://ethereum.github.io/execution-apis/). + +In this example, we’ll use the `eth_getTransactionReceipt` method, which retrieves detailed information about a specific transaction using its unique identifier (`transaction_hash`). + +To handle the request, we use the `requests` library. The communication with the node, the call for data, and the storage of the response happen in a single line of code. The result is then translated into a [JSON object](https://en.wikipedia.org/wiki/JSON), which contains the transaction receipt—a structured summary of the transaction's execution details: + +```json +{ + "blockHash": "0xf3cc3aa91392fb6dc9d4200ce2640b278f658ce3be2cb3ace288a42bdadeeee9", + "blockNumber": "0xb504e1", + "from": "0x37918a209697488c3e4c25a8a7e7e21e5532adfb", + "to": "0x03ab458634910aad20ef5f1c8ee96f1d6ac54919", + "gasUsed": "0x909c", + "logs": [ + { + "address": "0x03ab458634910aad20ef5f1c8ee96f1d6ac54919", + "blockTimestamp": "0x602acc4c", + "logIndex": "0xad", + "removed": false, + "data": "0x000000000000000000000000000000000000000000000030ca024f987b900000", + "topics": [ + "0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef", + "0x00000000000000000000000037918a209697488c3e4c25a8a7e7e21e5532adfb", + "0x000000000000000000000000dd1693bd8e307ecfdbe51d246562fc4109f871f8" + ], + } + ] +} +``` + +The information retrieved contains many raw details about the transaction, such as the block number where it was included, the sender's address, and the amount of gas used. Additionally, it reveals that during the transaction execution, the address `0x03a...` emitted a single log containing pieces of information about an event that occurred. We identify it as the [RAI token](https://etherscan.io/token/0x03ab458634910aad20ef5f1c8ee96f1d6ac54919) because the `address` field (`0x03a...`) belongs to it. + +## Decoding the event{#decoding} + +The main challenge lies in the lack of context—what do the data field or the topic's values represent? Moreover, all the information is stored in bytes (hexadecimal format), making it difficult to interpret at a glance. + +To make sense of the log, we need to decode it. Decoding involves translating the raw log data into a human-readable format, complete with function and parameter names. This process relies on the contract [ABI (Application Binary Interface)](https://docs.soliditylang.org/en/latest/abi-spec.html), which serves as a translation guide for understanding the data structure. Here's the code to perform the decoding: + +```python +# Extracting the log from the transaction_receipt +transaction_log = transaction_receipt["logs"][0] +# Manually inputing the ABI for the contract the emmited the log +# Here we simplified it to only the piece we will use, the standard ERC20 Transfer event +abi = [{ + "anonymous": False, + "inputs": [ + {"indexed": True, "name": "from", "type": "address"}, + {"indexed": True, "name": "to", "type": "address"}, + {"indexed": False, "name": "value", "type": "uint256"} + ], + "name": "Transfer", + "type": "event" +}] + +# Using the web3 library to call a method(process_log) to decode the log using the ABI. +decoded_log = Web3().eth.contract(address=Web3.to_checksum_address(transaction_log['address']), abi=abi).events.Transfer().process_log(transaction_log) +``` + +First, we extract the relevant log from the transaction receipt obtained earlier. This log contains raw, unstructured data emitted by a contract during the transaction. + +Next, we manually define the ABI for the contract that emitted the log. Since ABIs are not stored onchain, we must source them separately—either by checking platforms like Etherscan or compiling the contract code directly, if known. For simplicity, we’ve hardcoded a minimal ABI here, focusing only on the standard ERC20 `Transfer` event. + +With both the raw log and the ABI, we leverage the `web3` library to decode the log. Most coding languages have libraries that help programmers interact with Ethereum interfaces and data. This method simplifies the decoding process, translating raw hexadecimal data into meaningful information like the sender (`from`), receiver (`to`), and transfer amount (`value`). + +Here’s the decoded result: + +```json +{ + "address": "0x03ab458634910aad20ef5f1c8ee96f1d6ac54919", + "event": "Transfer", + "logIndex": "0xad", + "args": { + "from": "0x37918A209697488c3E4C25a8A7e7E21E5532ADFB", + "to": "0xdD1693BD8E307eCfDbe51D246562fc4109f871f8", + "value": 900000000000000000000 + } +} +``` + +Now, we can see that the transaction transferred 900.000000000000000000 [RAI tokens](https://etherscan.io/token/0x03ab458634910aad20ef5f1c8ee96f1d6ac54919) (including decimals) from `0x379...` to `0xdD1...`. + +## Data Transformation {#transformation} + +While our example could end here, we want to highlight an essential aspect of blockchain data exploration: the need to incorporate external data that isn’t part of the blockchain itself. This mirrors common practices in traditional (non-Web3) data analysis. + +In this example, we already encountered external data when sourcing the ABI. However, let’s consider a simpler and more frequent scenario: determining the USD value of a token transfer amount. Token prices aren’t recorded on the blockchain; instead, price discovery happens across various exchanges (both centralized, CEXes, and decentralized, DEXes). Price providers aggregate this information to offer consolidated price data. + +The following code demonstrates how to fetch this price data using a similar request-based process as we used for onchain data: + +```python +# Define the connection with a price provider. +price_api_url = "https://api.coingecko.com/api/v3/simple/price" +# Define the request params. +params = { + "ids": "rai", + "vs_currencies": "usd" +} +# Request, receive and store the RAI price. +rai_price = requests.get(url, params=params).json() +# Transform the vol, evaluating is USD value. +transfer_vol_in_current_price = decoded_log['args']['value'] / 10**18 * rai_price['rai']['usd'] +# Final output data. +combined_final_data = f"Transaction hash: {transaction_receipt['transactionHash']}\n" \ + f"Token Trasfered Address: {decoded_log['address']}\n" \ + f"From: {decoded_log['args']['from']}\n" \ + f"To: {decoded_log['args']['to']}\n" \ + f"Value Raw: {decoded_log['args']['value']}\n" \ + f"Value: {decoded_log['args']['value']/ 10**18}\n" \ + f"Price: {rai_price['rai']['usd']} USD\n" \ + f"Current value transfered: {transfer_vol_in_current_price} USD" +``` + +``` +Transaction hash: 0x2f8edd9bba379efa228bf8f39396e908d83ea418c602a3dd1bf178749c1714c0 +Token Trasfered Address: 0x03ab458634910aad20ef5f1c8ee96f1d6ac54919 +From: 0x37918A209697488c3E4C25a8A7e7E21E5532ADFB +To: 0xdD1693BD8E307eCfDbe51D246562fc4109f871f8 +Value Raw: 900000000000000000000 +Value: 900.0 +Price: 3.03 USD +Current value transfered: 2727.0 USD +``` + +This process produces a consolidated analysis of a single transaction. The same approach can be scaled to handle multiple transactions, with the results organized into a table with many transactions. Furthermore, additional data retrieved from the Ethereum node can be integrated, aggregated, and transformed into various metrics. + +## Code Wrap-up {#wrap-up} + +The final code demonstrates the entire workflow, and we’ll now revisit it, this time highlighting in the code the key steps that will be explored in detail in the upcoming topics: client node, request pipelines, raw data, ABI db, decoding, offchain inputs and transformed data. + +```python +import requests +import json +from web3 import Web3 +### Client Node +node_as_service_api_url = "https://eth.llamarpc.com" +payload = { + "jsonrpc": "2.0", + "method": "eth_getTransactionReceipt", + "params": ["0x2f8edd9bba379efa228bf8f39396e908d83ea418c602a3dd1bf178749c1714c0"] +} +### Request pipeline process +response = requests.post(url=node_as_service_api_url, json=payload) +### Raw data (transaction and log) +transaction_receipt = response.json()['result'] +transaction_log = transaction_receipt["logs"][0] +### ABI db Input +abi = [{ + "anonymous": False, + "inputs": [ + {"indexed": True, "name": "from", "type": "address"}, + {"indexed": True, "name": "to", "type": "address"}, + {"indexed": False, "name": "value", "type": "uint256"} + ], + "name": "Transfer", + "type": "event" +}] +### Decoding Process +decoded_log = Web3().eth.contract(address=Web3.to_checksum_address(transaction_log['address']), abi=abi).events.Transfer().process_log(transaction_log) +### Offchain Price Input +price_api_url = "https://api.coingecko.com/api/v3/simple/price" +params = { + "ids": "rai", + "vs_currencies": "usd" +} +rai_price = requests.get(url, params=params).json() +### Transformed Data +transfer_vol_in_current_price = decoded_log['args']['value'] / 10**18 * rai_price['rai']['usd'] +combined_final_data = f"Transaction hash: {transaction_receipt['transactionHash']}\n" \ + f"Token Trasfered Address: {decoded_log['address']}\n" \ + f"From: {decoded_log['args']['from']}\n" \ + f"To: {decoded_log['args']['to']}\n" \ + f"Value Raw: {decoded_log['args']['value']}\n" \ + f"Value: {decoded_log['args']['value']/ 10**18}\n" \ + f"Price: {rai_price['rai']['usd']} USD\n" \ + f"Current value transfered: {transfer_vol_in_current_price} USD" +``` \ No newline at end of file diff --git a/public/content/developers/docs/data-and-analytics/outsourcing-data/1-node-as-a-service.png b/public/content/developers/docs/data-and-analytics/outsourcing-data/1-node-as-a-service.png new file mode 100644 index 00000000000..1b26b9082a2 Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/outsourcing-data/1-node-as-a-service.png differ diff --git a/public/content/developers/docs/data-and-analytics/outsourcing-data/2-raw-streamer.png b/public/content/developers/docs/data-and-analytics/outsourcing-data/2-raw-streamer.png new file mode 100644 index 00000000000..b0df2e91ba6 Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/outsourcing-data/2-raw-streamer.png differ diff --git a/public/content/developers/docs/data-and-analytics/outsourcing-data/3-decode-streamer.png b/public/content/developers/docs/data-and-analytics/outsourcing-data/3-decode-streamer.png new file mode 100644 index 00000000000..572a6192a5d Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/outsourcing-data/3-decode-streamer.png differ diff --git a/public/content/developers/docs/data-and-analytics/outsourcing-data/4-Index-data-providers.png b/public/content/developers/docs/data-and-analytics/outsourcing-data/4-Index-data-providers.png new file mode 100644 index 00000000000..d0e6d6d0e49 Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/outsourcing-data/4-Index-data-providers.png differ diff --git a/public/content/developers/docs/data-and-analytics/outsourcing-data/5-offchain-logs.png b/public/content/developers/docs/data-and-analytics/outsourcing-data/5-offchain-logs.png new file mode 100644 index 00000000000..9de19bda83f Binary files /dev/null and b/public/content/developers/docs/data-and-analytics/outsourcing-data/5-offchain-logs.png differ diff --git a/public/content/developers/docs/data-and-analytics/outsourcing-data/index.md b/public/content/developers/docs/data-and-analytics/outsourcing-data/index.md new file mode 100644 index 00000000000..979af243c37 --- /dev/null +++ b/public/content/developers/docs/data-and-analytics/outsourcing-data/index.md @@ -0,0 +1,95 @@ +--- +title: Outsourced Data Types +description: Nodes, RPCs and Indexers can provide reliable data streams to developers and blockchain users. However, outsourcing data to a provider comes with tradeoffs. Explore the data service layer for crypto +lang: en +sidebarDepth: 3 +--- + +In the ever-evolving landscape of blockchain technology, managing and interpreting vast amounts of data presents both a challenge and an opportunity. For developers and businesses looking to leverage blockchain without the complexity of managing their own nodes and infrastructure, outsourced data services have become a crucial solution. This chapter delves into various outsourced data types that have emerged to address these needs, offering scalable, reliable, and efficient methods to access and process blockchain data. + +## Node-as-a-Service {#node-as-a-service} + +![Node as a service ](./1-node-as-a-service.png) + +**What is it?** \- Running your own node can be challenging, especially when getting started or when you want to scale fast. These services run optimized node infrastructures for you, and provide an interface for you to request its data, so you can focus on developing your transformation or product instead. Oftentimes, node services will run a variety of chains and optimized types, so you don’t need to run multiple clients, allowing you to access full and archive nodes in addition to client-specific methods in one API. + +Running your own node can be daunting, especially when starting out or scaling quickly. Node-as-a-Service (NaaS) providers offer optimized node infrastructures, allowing you to focus on development without the hassle of managing your own nodes. These services often support a range of chains and optimized types, providing access to both full and archive nodes, as well as client-specific methods, through a unified API. + +**How does it work? \-** NaaS providers operate distributed node clients behind the scenes and typically offer an API interface for interacting with the blockchain. This can include dedicated nodes or shared nodes that balance request loads. While NaaS itself does not index data, it may offer optimization features and provide raw or decoded data streams upon request. + +**Examples:** Alchemy, Infura, Akr, Quicknode, Tenderly + +**Data available:** Everything you can request a node, including raw and more exotic onchain data. + +## Raw Stream {#raw-stream} + +![Raw Streamer](./2-raw-streamer.png) + +**What is it?** \- Customers often require data in large quantities and in a regular cadence to keep up with the chain tip. Raw Stream services provide a solution by offering standardized schemas for frequently requested data and delivering it through various interfaces and sinks. + +**How does it work? \-** Raw Stream services are similar to Node-as-a-Service but with the added capability of storing and serving large volumes of data. These providers maintain infrastructure designed to handle continuous data flows, ensuring that users can access real-time or near-real-time data efficiently. + +**Examples:** Major Node-as-a-Service providers such as Alchemy, Infura, Akr, QuickNode, Tenderly, and Block Native, as well as specialized decoded streamers, offer raw streaming capabilities. + +**Data available:** + +* Typically Blocks, Transaction, Traces, Logs +* MemPool data +* Blob Indexing +* Beacon Chain +* MEV data + +## Decoded Stream {#decoded-stream} + +![Decoded Streamer](./3-decode-streamer.png) + +**What is it? \-** Decoded streaming services take raw data streams a step further by providing human-readable logs, traces, and sometimes view function tables. This relatively new service simplifies working with complex blockchain data by translating it into formats that are easier to interpret and analyze. + +**How does it work? \-** Decoding data involves more than just accessing raw data from the nodes. Providers must maintain a comprehensive ABI (Application Binary Interface) database and a decoding system running. Since ABIs cannot be sourced directly from the blockchain, they are usually obtained from off-chain providers like Etherscan, where contracts are submitted for transparency and auditability. However, not all contracts have their ABIs available. + +There are two primary methods for decoding data: + +1. **Manual ABI Retrieval:** This method, pioneered by Dune, involves users filling out a form to provide an ABI or allowing the system to automatically download it from Etherscan. Only the requested contracts are decoded, with a exact ABI match, making this method highly reliable but dependent on having a wide range of contract submissions to increase corverage. +2. **Algorithmic Decoding:** This method uses algorithms to match log and function signatures (topic0, 8 bytes) with any available ABIs. Although it involves complex logic, because contracts share a lot of code, the algotith can identify ABIs that match contract signatures even if the exact ABI file is not available. This approach offers broader coverage but may require handling mismatches. This method is very useful for analytics since it allows exploration of contract data without prior knowing about it existence. + +**Examples:** Dune, Flipside, Allium, Sonaverse, Bitquery, Chainbase + +**Data available:** + +* Decoded Traces, Decoded Logs, ERC20/721/1155 Transfer Evts +* BalanceOf and SupplyOf view functions (Allium), triggered after a transferEvt + +## Indexed Data API {#indexed-data} + +![Index Data Providers](./4-Index-data-providers.png) + +**What is it? \-** Indexed Data APIs is the regular product for any data provider. They provide metrics (transformed data) through an API, often accompanied by an SDK for easier integration.These metrics can encompass a wide range of data, with providers continually expanding their offerings to include new and specialized metrics, each provider trying to open new niches. + +**How does it work?** While it might seem that Indexed Data Providers are an evolution of previous services, they actually serve a different function. Although the data still undergoes the entire flow described in the first post of this series, Indexed Data Providers typically operate with a more streamlined stack. They focus on extracting, transforming, loading and delivering specific metrics rather then serving decoded, raw, or data requests from their clients. The previous types of services are designed for scalability, offering a standartized and broad range of data, without transformation, whereas Indexed Data Providers are optimized for providing specific, pre-processed metrics. + +Moreover, the streaming services discussed earlier can only provide raw and decoded data. If the metrics offered by an Indexed Data Provider require “Other Onchain Data,” their systems must make additional node requests—either through their own nodes or by relying on external nodes—to source the necessary data according to their specifications. + +**Examples:** Nansem, Defillama, Artemis, Bitquery, Arkham, Blockworks, Glassnode, Messari, Token Terminal + +**Data available:** + +* Network and Protocol Metrics (Dex, Lending, Derivatives and Perps, Dex Agg, etc) +* NFT +* Wallet interaction and balances +* Mempool +* Bridges +* Staking, etc + +## Offchain Logs + +![Offchain logs](./5-offchain-logs.png) + +**What is it? \-** Offchain logs are similar to regular logs, but instead of it being emitted by the contracts deployed on the blockchain, it’s emitted by modifying in the contract’s source code locally, and then submitting it to a special archive node. This node re-executes the blockchain history up to the current head, but running the contract’s new code version instead. Companies offering shadow logs provide the data in a format similar to how raw or decoded logs are delivered. + +**How does it work? \-** Unlike regular logs, which must be written into the original contract code, offchain logs can be created by anyone at any time, not just the deployer. This flexibility enables the creation of logs and metrics that were not present in the original contract deployment. Offchain logs can thus access a wide range of data types, including parts of the state that otherwise would be much more complex to read, such as internal functions and internal variables. This makes offchain logs a powerful tool for in-deep data analysis. + +**Examples:** Shadow Logs, Ghost Logs + +## Conclusion + +Understanding what services can be outsourced have become essential for navigating the vast and complex landscape of crypto data. From running nodes to providing raw, decoded, and indexed data, these services allow developers and businesses to access the blockchain efficiently without managing their own infrastructure. Each type of outsourced data offers unique trade-offs that cater to different needs. By leveraging these services, users can focus on building and scaling their applications, while the heavy lifting of data infrastructure is handled by specialized providers. As the ecosystem continues to evolve, the reliance on keeping these outsourced solutions accessible will only grow, making them a cornerstone of the crypto data landscape. diff --git a/src/data/developer-docs-links.yaml b/src/data/developer-docs-links.yaml index cb0d5b5ccf5..7d66566508f 100644 --- a/src/data/developer-docs-links.yaml +++ b/src/data/developer-docs-links.yaml @@ -154,6 +154,14 @@ items: - id: docs-nav-block-explorers href: /developers/docs/data-and-analytics/block-explorers/ + - id: docs-nav-extraction-tranformation-example + href: /developers/docs/data-and-analytics/extraction-tranformation-example/ + - id: docs-nav-data-flow + href: /developers/docs/data-and-analytics/data-flow/ + - id: docs-nav-outsourcing-data + href: /developers/docs/data-and-analytics/outsourcing-data/ + - id: docs-nav-data-stacks + href: /developers/docs/data-and-analytics/data-stacks/ - id: docs-nav-storage href: /developers/docs/storage/ description: docs-nav-storage-description diff --git a/src/lib/utils/md.ts b/src/lib/utils/md.ts index 0925b5986c1..452d2238493 100644 --- a/src/lib/utils/md.ts +++ b/src/lib/utils/md.ts @@ -78,6 +78,10 @@ const getPostSlugs = (dir: string, files: string[] = []) => { "/developers/docs/dapps", "/developers/docs/data-and-analytics", "/developers/docs/data-and-analytics/block-explorers", + "/developers/docs/data-and-analytics/extraction-tranformation-example", + "/developers/docs/data-and-analytics/data-flow", + "/developers/docs/data-and-analytics/outsourcing-data", + "/developers/docs/data-and-analytics/data-stacks", "/developers/docs/data-availability", "/developers/docs/data-availability/blockchain-data-storage-strategies", "/developers/docs/data-structures-and-encoding",