• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

getdozer / dozer / 6105410942

07 Sep 2023 04:28AM UTC coverage: 77.562% (-0.1%) from 77.686%
6105410942

push

github

chloeminkyung
feat: onnx image

1141 of 1141 new or added lines in 66 files covered. (100.0%)

49957 of 64409 relevant lines covered (77.56%)

50900.25 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

97.46
/dozer-cli/src/simple/executor.rs
1
use dozer_api::grpc::internal::internal_pipeline_server::LogEndpoint;
2
use dozer_cache::dozer_log::camino::Utf8Path;
3
use dozer_cache::dozer_log::home_dir::{BuildPath, HomeDir};
4
use dozer_cache::dozer_log::replication::Log;
5
use dozer_core::checkpoint::{CheckpointFactory, CheckpointFactoryOptions, OptionCheckpoint};
6
use dozer_tracing::LabelsAndProgress;
7
use dozer_types::models::api_endpoint::ApiEndpoint;
8
use dozer_types::models::flags::Flags;
9
use dozer_types::parking_lot::Mutex;
10
use tokio::runtime::Runtime;
11

12
use std::sync::{atomic::AtomicBool, Arc};
13

14
use dozer_types::models::source::Source;
15
use dozer_types::models::udf_config::UdfConfig;
16

17
use crate::pipeline::PipelineBuilder;
18
use crate::shutdown::ShutdownReceiver;
19
use dozer_core::executor::{DagExecutor, ExecutorOptions};
20

21
use dozer_types::models::connection::Connection;
22

23
use crate::errors::{BuildError, OrchestrationError};
24

25
use super::Contract;
26

27
pub struct Executor<'a> {
28
    connections: &'a [Connection],
29
    sources: &'a [Source],
30
    sql: Option<&'a str>,
31
    checkpoint_factory: Arc<CheckpointFactory>,
32
    checkpoint: OptionCheckpoint,
33
    /// `ApiEndpoint` and its log.
34
    endpoint_and_logs: Vec<(ApiEndpoint, LogEndpoint)>,
35
    labels: LabelsAndProgress,
36
    udfs: &'a [UdfConfig],
37
}
38

39
impl<'a> Executor<'a> {
40
    // TODO: Refactor this to not require both `contract` and all of
41
    // connections, sources and sql
42
    #[allow(clippy::too_many_arguments)]
43
    pub async fn new(
30✔
44
        home_dir: &'a HomeDir,
30✔
45
        contract: &Contract,
30✔
46
        connections: &'a [Connection],
30✔
47
        sources: &'a [Source],
30✔
48
        sql: Option<&'a str>,
30✔
49
        api_endpoints: &'a [ApiEndpoint],
30✔
50
        checkpoint_factory_options: CheckpointFactoryOptions,
30✔
51
        labels: LabelsAndProgress,
30✔
52
        udfs: &'a [UdfConfig],
30✔
53
    ) -> Result<Executor<'a>, OrchestrationError> {
30✔
54
        // Find the build path.
55
        let build_path = home_dir
30✔
56
            .find_latest_build_path()
30✔
57
            .map_err(|(path, error)| OrchestrationError::FileSystem(path.into(), error))?
30✔
58
            .ok_or(OrchestrationError::NoBuildFound)?;
30✔
59

60
        // Load pipeline checkpoint.
61
        let (checkpoint_factory, last_checkpoint, _) =
30✔
62
            CheckpointFactory::new(build_path.data_dir.to_string(), checkpoint_factory_options)
30✔
63
                .await?;
30✔
64

65
        let mut endpoint_and_logs = vec![];
30✔
66
        for endpoint in api_endpoints {
60✔
67
            let log_endpoint = create_log_endpoint(
30✔
68
                contract,
30✔
69
                &build_path,
30✔
70
                &endpoint.name,
30✔
71
                &checkpoint_factory,
30✔
72
                last_checkpoint.num_slices(),
30✔
73
            )
30✔
74
            .await?;
30✔
75
            endpoint_and_logs.push((endpoint.clone(), log_endpoint));
30✔
76
        }
77

78
        Ok(Executor {
30✔
79
            connections,
30✔
80
            sources,
30✔
81
            sql,
30✔
82
            checkpoint_factory: Arc::new(checkpoint_factory),
30✔
83
            checkpoint: last_checkpoint,
30✔
84
            endpoint_and_logs,
30✔
85
            labels,
30✔
86
            udfs,
30✔
87
        })
30✔
88
    }
30✔
89

90
    pub fn endpoint_and_logs(&self) -> &[(ApiEndpoint, LogEndpoint)] {
30✔
91
        &self.endpoint_and_logs
30✔
92
    }
30✔
93

94
    pub async fn create_dag_executor(
30✔
95
        self,
30✔
96
        runtime: &Arc<Runtime>,
30✔
97
        executor_options: ExecutorOptions,
30✔
98
        shutdown: ShutdownReceiver,
30✔
99
        flags: Flags,
30✔
100
    ) -> Result<DagExecutor, OrchestrationError> {
30✔
101
        let builder = PipelineBuilder::new(
30✔
102
            self.connections,
30✔
103
            self.sources,
30✔
104
            self.sql,
30✔
105
            self.endpoint_and_logs
30✔
106
                .into_iter()
30✔
107
                .map(|(endpoint, log)| (endpoint, Some(log.log)))
30✔
108
                .collect(),
30✔
109
            self.labels.clone(),
30✔
110
            flags,
30✔
111
            self.udfs,
30✔
112
        );
30✔
113

114
        let dag = builder.build(runtime, shutdown).await?;
60✔
115
        let exec = DagExecutor::new(
30✔
116
            dag,
30✔
117
            self.checkpoint_factory,
30✔
118
            self.checkpoint,
30✔
119
            executor_options,
30✔
120
        )
30✔
121
        .await?;
×
122

123
        Ok(exec)
30✔
124
    }
30✔
125
}
126

127
pub fn run_dag_executor(
30✔
128
    dag_executor: DagExecutor,
30✔
129
    running: Arc<AtomicBool>,
30✔
130
    labels: LabelsAndProgress,
30✔
131
) -> Result<(), OrchestrationError> {
30✔
132
    let join_handle = dag_executor.start(running, labels)?;
30✔
133
    join_handle
30✔
134
        .join()
30✔
135
        .map_err(OrchestrationError::ExecutionError)
30✔
136
}
30✔
137

138
async fn create_log_endpoint(
30✔
139
    contract: &Contract,
30✔
140
    build_path: &BuildPath,
30✔
141
    endpoint_name: &str,
30✔
142
    checkpoint_factory: &CheckpointFactory,
30✔
143
    num_persisted_entries_to_keep: usize,
30✔
144
) -> Result<LogEndpoint, OrchestrationError> {
30✔
145
    let endpoint_path = build_path.get_endpoint_path(endpoint_name);
30✔
146

147
    let schema = contract
30✔
148
        .endpoints
30✔
149
        .get(endpoint_name)
30✔
150
        .ok_or_else(|| BuildError::MissingEndpoint(endpoint_name.to_owned()))?;
30✔
151
    let schema_string =
30✔
152
        dozer_types::serde_json::to_string(schema).map_err(BuildError::SerdeJson)?;
30✔
153

154
    let descriptor_bytes = tokio::fs::read(&build_path.descriptor_path)
30✔
155
        .await
30✔
156
        .map_err(|e| {
30✔
157
            OrchestrationError::FileSystem(build_path.descriptor_path.clone().into(), e)
×
158
        })?;
30✔
159

160
    let log_prefix = AsRef::<Utf8Path>::as_ref(checkpoint_factory.prefix())
30✔
161
        .join(&endpoint_path.log_dir_relative_to_data_dir);
30✔
162
    let log = Log::new(
30✔
163
        checkpoint_factory.storage(),
30✔
164
        log_prefix.into(),
30✔
165
        num_persisted_entries_to_keep,
30✔
166
    )
30✔
167
    .await?;
×
168
    let log = Arc::new(Mutex::new(log));
30✔
169

30✔
170
    Ok(LogEndpoint {
30✔
171
        build_id: build_path.id.clone(),
30✔
172
        schema_string,
30✔
173
        descriptor_bytes,
30✔
174
        log,
30✔
175
    })
30✔
176
}
30✔
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc