Keyboard shortcuts

Press or to navigate between chapters

Press S or / to search in the book

Press ? to show this help

Press Esc to hide this help

API Reference

Module Overview

graph TB
    subgraph "Public API"
        A[lib.rs] --> B[data::KoiDataset]
        A --> C[features::FeatureEngineer]
        A --> D[two_stage_model::TwoStageClassifier]
        A --> E[evaluation::ModelEvaluator]
        A --> F[report::generate_report]
        A --> G[logger::Logger]
    end

    subgraph "Internal Modules"
        H[decision_tree.rs]
        I[model.rs]
    end

    D --> H
    D --> I

data::KoiDataset

Loads and manages the KOI dataset from CSV using Polars.

graph LR
    A[CSV File] -->|Polars| B[KoiDataset]
    B --> C[n_samples]
    B --> D[n_features]
    B --> E[features Array2]
    B --> F[labels Array1]
    B --> G[class_distribution]

Methods

MethodSignatureDescription
loadload(path: &str) -> Result<KoiDataset>Load dataset from CSV
n_samplesn_samples() -> usizeNumber of rows
n_featuresn_features() -> usizeNumber of feature columns
featuresfeatures() -> &Array2<f64>Feature matrix
labelslabels() -> &Array1<u8>Label vector
class_distributionclass_distribution() -> HashMap<String, usize>Count per class
feature_indexfeature_index(name: &str) -> Option<usize>Column index by name

Usage

#![allow(unused)]
fn main() {
use astrophage::data::KoiDataset;

let dataset = KoiDataset::load("data/koi_dataset.csv")?;
println!("Loaded {} samples with {} features", 
    dataset.n_samples(), 
    dataset.n_features()
);

let dist = dataset.class_distribution();
for (class, count) in &dist {
    println!("{}: {}", class, count);
}
}

features::FeatureEngineer

Transforms raw data into model-ready features through imputation, standardization, and derived feature computation.

graph LR
    A[KoiDataset] -->|process| B[FeatureEngineer]
    B --> C[Imputation]
    C --> D[Standardization]
    D --> E[Derived Features]
    E --> F[ProcessedDataset]
    F --> G[split]
    G --> H[Train Set]
    G --> I[Test Set]

Methods

MethodSignatureDescription
newnew() -> FeatureEngineerCreate new engineer
processprocess(&mut self, dataset: &KoiDataset) -> Result<ProcessedDataset>Full pipeline

ProcessedDataset Methods

MethodSignatureDescription
n_samplesn_samples() -> usizeNumber of rows
n_featuresn_features() -> usizeNumber of columns
featuresfeatures() -> &Array2<f64>Feature matrix
labelslabels() -> &Array1<u8>Label vector
feature_namesfeature_names() -> &[String]Column names
splitsplit(test_ratio: f64, seed: u64) -> (ProcessedDataset, ProcessedDataset)Stratified split

Usage

#![allow(unused)]
fn main() {
use astrophage::features::FeatureEngineer;

let mut engineer = FeatureEngineer::new();
let processed = engineer.process(&dataset)?;

let (train, test) = processed.split(0.2, 42); // 80/20, seed=42
println!("Train: {}, Test: {}", train.n_samples(), test.n_samples());
}

two_stage_model::TwoStageClassifier

The main two-stage random forest classifier.

graph TB
    A[TwoStageClassifier] --> B[Stage 1 RF]
    A --> C[Stage 2 RF]
    B --> D[train Stage 1]
    C --> E[train Stage 2]
    D --> F[predict]
    E --> F
    F --> G[feature_importance]

Methods

MethodSignatureDescription
newnew() -> TwoStageClassifierCreate new classifier
traintrain(&mut self, train: &ProcessedDataset) -> Result<()>Train both stages
predictpredict(&self, features: &Array2<f64>) -> Vec<u8>Predict labels
predict_probapredict_proba(&self, features: &Array2<f64>) -> Vec<Vec<f64>>Predict probabilities
feature_importancefeature_importance() -> Vec<(String, f64)>Feature importance scores

Usage

#![allow(unused)]
fn main() {
use astrophage::two_stage_model::TwoStageClassifier;

let mut classifier = TwoStageClassifier::new();
classifier.train(&train)?;

// Predictions
let predictions = classifier.predict(test.features());

// Feature importance
for (name, score) in classifier.feature_importance().iter().take(10) {
    println!("{}: {:.4}", name, score);
}
}

evaluation::ModelEvaluator

Computes comprehensive classification metrics.

graph LR
    A[TwoStageClassifier] -->|+ Test Data| B[ModelEvaluator]
    B --> C[Accuracy]
    B --> D[Precision]
    B --> E[Recall]
    B --> F[F1-Score]
    B --> G[Per-Class Metrics]

Methods

MethodSignatureDescription
newnew(classifier: &TwoStageClassifier, test: &ProcessedDataset) -> ModelEvaluatorCreate evaluator
evaluateevaluate(&self) -> Result<Metrics>Compute all metrics

Metrics Structure

#![allow(unused)]
fn main() {
pub struct Metrics {
    pub accuracy: f64,
    pub macro_f1: f64,
    pub weighted_f1: f64,
    pub per_class: HashMap<String, ClassMetrics>,
}

pub struct ClassMetrics {
    pub precision: f64,
    pub recall: f64,
    pub f1_score: f64,
}
}

Usage

#![allow(unused)]
fn main() {
use astrophage::evaluation::ModelEvaluator;

let evaluator = ModelEvaluator::new(&classifier, &test);
let metrics = evaluator.evaluate()?;

println!("Accuracy: {:.4f}", metrics.accuracy);
println!("Macro F1: {:.4f}", metrics.macro_f1);

for (class, m) in &metrics.per_class {
    println!("{}: P={:.4f} R={:.4f} F1={:.4f}", 
        class, m.precision, m.recall, m.f1_score);
}
}

report::generate_report

Generates the comprehensive JSON report.

graph LR
    A[Metrics] -->|+ Classifier| B[generate_report]
    B --> C[report.json]
    C --> D[Feature Importance]
    C --> E[Per-Class Metrics]
    C --> F[Astrophysical Insights]
    C --> G[Recommendations]

Function

#![allow(unused)]
fn main() {
pub fn generate_report(
    metrics: &Metrics, 
    classifier: &TwoStageClassifier
) -> Result<()>
}

Output: output/report.json

Report Structure

{
  "project_name": "Astrophage",
  "version": "0.2.0",
  "summary": { ... },
  "metrics": { ... },
  "feature_importance": [ ... ],
  "astrophysical_insights": [ ... ],
  "recommendations": [ ... ]
}

logger::Logger

Structured logging with tracing.

graph LR
    A[Logger] --> B[Console Output]
    A --> C[File Output]
    B --> D[Colored Logs]
    C --> E[app.log]

Methods

MethodSignatureDescription
initinit(console: bool) -> Result<()>Initialize logger

Usage

#![allow(unused)]
fn main() {
use astrophage::logger::Logger;

Logger::init(true).await?;
tracing::info!("Training started...");
}

Internal: decision_tree::DecisionTree

Custom decision tree implementation using Gini impurity.

graph TD
    A[DecisionTree] --> B[fit]
    B --> C[find_best_split]
    C --> D[compute_gini]
    D --> E[split_node]
    E --> F[recurse_left]
    E --> G[recurse_right]
    F --> H[Leaf or Split]
    G --> H

Key Parameters

ParameterDefaultDescription
max_depth10Maximum tree depth
min_samples_leaf5Minimum samples per leaf
max_featuressqrt(n)Features considered per split

Internal: model::RandomForest

Ensemble of decision trees with bootstrap sampling.

graph TD
    A[RandomForest] --> B[n_estimators: 100]
    B --> C[Tree 1]
    B --> D[Tree 2]
    B --> E[...]
    B --> F[Tree N]
    C --> G[Majority Vote]
    D --> G
    F --> G
    G --> H[Final Prediction]

Key Parameters

ParameterDefaultDescription
n_estimators100Number of trees
max_depth10Max depth per tree
max_featuressqrt(n)Feature subsampling ratio
bootstraptrueUse bootstrap sampling

Data Schema

Expected columns in koi_dataset.csv:

Orbital Parameters

ColumnUnitDescription
koi_perioddaysOrbital period
koi_durationhoursTransit duration
koi_depthppmTransit depth
koi_impactImpact parameter
koi_ingresshoursIngress duration
koi_incldegOrbital inclination
koi_eccenEccentricity
koi_smaAUSemi-major axis

Physical Parameters

ColumnUnitDescription
koi_rorRadius ratio (planet/star)
koi_pradR⊕Planetary radius
koi_teqKEquilibrium temperature
koi_insolEarth fluxInsolation flux

Signal Quality

ColumnDescription
koi_model_snrSignal-to-noise ratio
koi_countNumber of KOIs in system
koi_num_transitsNumber of detected transits
koi_max_sngle_evMax single event statistic
koi_max_mult_evMax multiple event statistic

False Positive Flags

ColumnDescription
koi_fpflag_ntNot Transit-like
koi_fpflag_ssStellar Eclipse
koi_fpflag_coCentroid Offset
koi_fpflag_ecEphemeris Match

Stellar Parameters

ColumnUnitDescription
koi_kepmagmagKepler magnitude
koi_dorDuration/period ratio
koi_srhog/cm³Stellar density
koi_steffKStellar effective temperature
koi_sloggcm/s²Surface gravity (log)
koi_smetdexMetallicity
koi_sradR☉Stellar radius
koi_smassM☉Stellar mass