Initial commit

2021-12-25 19:30:14 -03:00
commit b918498930
33 changed files with 37200 additions and 0 deletions
--- a/src/database.rs
+++ b/src/database.rs
@@ -0,0 +1,113 @@
+use std::fs;
+
+use reqwest;
+use rusqlite::Connection;
+use rusqlite::params;
+
+use crate::language::Language;
+use crate::entry::WiktionaryEntries;
+
+/// A database of Wiktionary entries
+pub struct WordDb {
+    connection: String
+}
+
+impl WordDb {
+    pub fn new(db_path: &str) -> Self {
+        Self {
+            connection: String::from(db_path)
+        }
+    }
+
+    pub fn connect(&self) -> Connection {
+        Connection::open(&self.connection).unwrap()
+    }
+
+    pub fn clean_tables(&mut self, lang: &Language) {
+        let mut connection = self.connect();
+        let transaction = connection.transaction().unwrap();
+
+        transaction.execute(&format!("DROP TABLE IF EXISTS {}_words", &lang.code), []).unwrap();
+        transaction.execute(&format!("DROP TABLE IF EXISTS {}_types", &lang.code), []).unwrap();
+
+        transaction.execute(&format!("
+        CREATE TABLE {}_types (
+                id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+                name TINYTEXT UNIQUE NOT NULL
+        )", &lang.code), []).unwrap();
+
+        for type_ in &lang.types {
+            transaction.execute(&format!("
+            INSERT INTO {}_types ( name )
+            VALUES (
+                ?
+            )", &lang.code), [type_]).unwrap();
+        }
+
+        transaction.execute(&format!("
+        CREATE TABLE {}_words (
+                id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+                word TINYTEXT UNIQUE NOT NULL,
+                type_id INTEGER NOT NULL,
+                content MEDIUMTEXT NOT NULL,
+                FOREIGN KEY (type_id)
+                    REFERENCES {}_types (id)
+        )", &lang.code, &lang.code), []).unwrap();
+
+        transaction.commit().unwrap();
+    }
+
+    pub fn insert_entries(&mut self, lang: &Language, entries: WiktionaryEntries) {
+        let mut connection = self.connect();
+        let transaction = connection.transaction().unwrap();
+
+        for entry in entries {
+            transaction.execute(&format!("
+            INSERT INTO {}_words ( word, content, type_id )
+            VALUES (
+                    ?, ?,
+                    (SELECT id FROM {}_types WHERE name = ?)
+            )", &lang.code, &lang.code),
+                params![entry.word,
+                        entry.parsed_json.to_string(),
+                        entry.type_]
+            ).unwrap();
+        }
+
+        transaction.commit().unwrap();
+    }
+
+    pub async fn upgrade_lang(&mut self, lang: &Language) {
+        println!("Trying to read cached data...");
+        let cached_data = fs::read_to_string("Polish.json");
+        let mut request = None;
+
+        if let Err(_) = cached_data {
+            request = Some(reqwest::get("https://kaikki.org/dictionary/Polish/kaikki.org-dictionary-Polish.json"));
+        }
+
+        println!("Cleaning tables...");
+        self.clean_tables(lang);
+
+        let data;
+        if let Some(request) = request {
+            // Actually, the request was sent before
+            println!("Requesting data...");
+            data = request.await.unwrap().text().await.unwrap();
+            println!("Caching data...");
+            fs::write("Polish.json", &data).unwrap();
+        }
+        else {
+            data = cached_data.unwrap();
+        }
+
+        println!("Parsing data...");
+        let entries = WiktionaryEntries::parse_data(data)
+                                        .merge_duplicates();
+
+        println!("Inserting data...");
+        self.insert_entries(lang, entries);
+
+        println!("Done");
+    }
+}
--- a/src/entry.rs
+++ b/src/entry.rs
@@ -0,0 +1,136 @@
+use std::cmp;
+use std::iter::IntoIterator;
+use json::JsonValue::{Object, Short, Array};
+use json::JsonValue;
+
+#[derive (Clone)]
+#[derive (Debug)]
+pub struct WiktionaryEntry {
+    pub word: String,
+    pub type_: String,
+    pub parsed_json: JsonValue
+}
+
+impl cmp::PartialEq for WiktionaryEntry {
+    fn eq(&self, other: &Self) -> bool {
+        self.word.eq(&other.word)
+    }
+}
+
+impl cmp::Eq for WiktionaryEntry {}
+
+impl cmp::PartialOrd for WiktionaryEntry {
+    fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl cmp::Ord for WiktionaryEntry {
+    fn cmp(&self, other: &Self) -> cmp::Ordering {
+        self.word.cmp(&other.word)
+    }
+}
+
+impl WiktionaryEntry {
+    fn merge(first: Self, second: Self) -> Self {
+        let output_parsed: JsonValue = match first.parsed_json {
+            Array(mut objs) => {
+                objs.push(second.parsed_json);
+                JsonValue::Array(objs)
+            },
+            Object(_) => {
+                let mut objs: Vec<JsonValue> = Vec::new();
+                objs.push(first.parsed_json);
+                objs.push(second.parsed_json);
+                JsonValue::Array(objs)
+            },
+            _ => panic!("Expected array or object, found {}", first.parsed_json.pretty(8))
+        };
+
+        Self {
+            word: first.word,
+            type_: first.type_,
+            parsed_json: output_parsed
+        }
+    }
+
+    pub fn parse(unparsed_json: &str) -> Self {
+        let json = json::parse(unparsed_json).unwrap();
+
+        let (word, type_) = match &json {
+            Object(o) => ( 
+                match o.get("word") {
+                    Some(w) => match w {
+                        Short(s) => s.to_string(),
+                        JsonValue::String(s) => s.clone(),
+                        _ => panic!("Not a string: {}", w.pretty(8))
+                    },
+                    None => panic!("No field 'word': {}", o.pretty(8))
+                },
+                match o.get("pos") {
+                    Some(w) => match w {
+                        Short(s) => s.to_string(),
+                        JsonValue::String(s) => s.clone(),
+                        _ => panic!("Not a string: {}", w.pretty(8))
+                    },
+                    None => panic!("No field 'pos': {}", o.pretty(8))
+                }
+            ),
+            _ => panic!("Not an object: {}", json.pretty(8))
+        };
+
+        Self {
+            word,
+            type_,
+            parsed_json: json
+        }
+    }
+}
+
+pub struct WiktionaryEntries(Vec<WiktionaryEntry>);
+
+impl WiktionaryEntries {
+    pub fn parse_data(data: String) -> Self {
+        let mut entries: Vec<WiktionaryEntry> = Vec::new();
+
+        for line in data.lines() {
+            entries.push(WiktionaryEntry::parse(line));
+        }
+
+        Self(entries)
+    }
+
+    pub fn merge_duplicates(mut self) -> Self {
+        self.0.sort();
+
+        let mut entries = self.0.into_iter();
+        let mut last_entry: WiktionaryEntry = entries.next().unwrap();
+
+        let mut new_entries = Vec::new();
+
+        for entry in entries {
+            if last_entry == entry {
+                last_entry = WiktionaryEntry::merge(last_entry, entry);
+            }
+            else {
+                new_entries.push(last_entry);
+                last_entry = entry;
+            }
+        }
+
+        new_entries.push(last_entry);
+
+        self.0 = new_entries;
+
+        self
+    }
+}
+
+impl IntoIterator for WiktionaryEntries {
+    type Item = WiktionaryEntry;
+    type IntoIter = std::vec::IntoIter<Self::Item>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
+    }
+}
--- a/src/language.rs
+++ b/src/language.rs
@@ -0,0 +1,14 @@
+#[derive(Debug)]
+pub struct Language {
+    pub code: String,
+    pub types: Vec<String>
+}
+
+impl Language {
+    pub fn new(code: &str, types: Vec<String>) -> Self {
+        Self {
+            code: String::from(code),
+            types
+        }
+    }
+}
--- a/src/main.rs
+++ b/src/main.rs
@@ -0,0 +1,94 @@
+//mod database;
+use rocket::routes;
+use rocket::fs::FileServer;
+use rocket::data::{Limits, ToByteUnit};
+use clap::{App, AppSettings, Arg, SubCommand};
+//use database::WordDb;
+mod database;
+mod language;
+mod entry;
+mod routes;
+
+use database::WordDb;
+use language::Language;
+
+#[rocket::main]
+async fn main() {
+    let matches = App::new("inflectived")
+        .version("0.1")
+        .author("Augusto Gunsch <augustogunsch@tutanota.com>")
+        .about("inflective daemon")
+        .subcommands(vec![
+            SubCommand::with_name("upgrade")
+                .about("Upgrade or install a language database")
+                .arg(
+                    Arg::with_name("LANG")
+                        .required(true)
+                        .index(1)
+                        .help("Language database to upgrade"),
+                ),
+            SubCommand::with_name("run").about("Run the daemon").arg(
+                Arg::with_name("port")
+                    .short("p")
+                    .long("port")
+                    .value_name("PORT")
+                    .help("Port to run the server on")
+                    .takes_value(true),
+            ),
+            SubCommand::with_name("list")
+                .about("List language databases")
+                .arg(
+                    Arg::with_name("installed")
+                        .short("i")
+                        .long("installed")
+                        .help("List only installed databases"),
+                ),
+            SubCommand::with_name("passwd").about("Set admin password for remote management"),
+        ])
+        .setting(AppSettings::SubcommandRequiredElseHelp)
+        .get_matches();
+
+    let mut db = WordDb::new("test.db");
+
+    let lang = Language::new("polish",
+                             vec![String::from("adj"),
+                                  String::from("noun"),
+                                  String::from("verb"),
+                                  String::from("character"),
+                                  String::from("suffix"),
+                                  String::from("prefix"),
+                                  String::from("conj"),
+                                  String::from("adv"),
+                                  String::from("infix"),
+                                  String::from("name"),
+                                  String::from("phrase"),
+                                  String::from("prep_phrase"),
+                                  String::from("intj"),
+                                  String::from("det"),
+                                  String::from("prep"),
+                                  String::from("proverb"),
+                                  String::from("abbrev"),
+                                  String::from("num"),
+                                  String::from("pron"),
+                                  String::from("punct"),
+                                  String::from("interfix"),
+                                  String::from("particle")]);
+
+    match matches.subcommand() {
+        ("upgrade", _) => { db.upgrade_lang(&lang).await; },
+        ("run", _) => {
+            let figment = rocket::Config::figment()
+                                         .merge(("address", "0.0.0.0"));
+
+            rocket::custom(figment)
+                   .manage(db)
+                   .mount("/static", FileServer::from("static/"))
+                   .mount("/", routes![routes::get_word,
+                                       routes::get_word_like,
+                                       routes::frontend])
+                   .launch()
+                   .await.unwrap();
+        },
+        _ => {}
+    }
+}
--- a/src/routes.rs
+++ b/src/routes.rs
@@ -0,0 +1,59 @@
+use std::fs;
+
+use rocket::get;
+use rocket::State;
+use rocket::http::Status;
+use rocket::response::{content, status};
+use rocket::serde::json::Json;
+use rusqlite::params;
+
+use crate::database::WordDb;
+
+#[get("/frontend")]
+pub fn frontend() -> Option<content::Html<String>> {
+    match fs::read_to_string("static/index.html") {
+        Ok(file) => Some(content::Html(file)),
+        Err(_) => None
+    }
+}
+
+#[get("/langs/<lang>/words/<word>")]
+pub fn get_word(db: &State<WordDb>, lang: &str, word: &str) -> Option<content::Json<String>> {
+    let connection = db.connect();
+
+    let word = connection.query_row(&format!(
+            "SELECT content FROM {}_words WHERE word = ?",
+            lang),
+        [word],
+        |row| row.get(0)
+    );
+
+    match word {
+        Ok(w) => Some(content::Json(w)),
+        Err(_) => None
+    }
+}
+
+#[get("/langs/<lang>/words?<like>&<limit>&<offset>")]
+pub fn get_word_like(db: &State<WordDb>, lang: &str, like: &str, limit: usize, offset: usize) -> Json<Vec<String>> {
+    let connection = db.connect();
+
+    let mut statement = connection.prepare(&format!(
+            "SELECT word
+            FROM {}_words
+            WHERE word LIKE ?
+            ORDER BY length(word) ASC
+            LIMIT ?
+            OFFSET ?",
+            lang)
+    ).unwrap();
+
+    let mut rows = statement.query(params![format!("%{}%", like), limit, offset]).unwrap();
+
+    let mut words = Vec::new();
+    while let Some(row) = rows.next().unwrap() {
+        words.push(row.get(0).unwrap());
+    }
+
+    Json(words)
+}