use std::fs; use reqwest; use rusqlite::{Connection, Transaction}; use rusqlite::params; use serde_json::Value; use serde_json::json; use serde_json; use crate::language::Language; use crate::entry::{WiktionaryEntries, WiktionaryEntry}; use crate::entry::Form; /// A database of Wiktionary entries pub struct WordDb { connection: String } impl WordDb { pub fn new(db_path: &str) -> Self { Self { connection: String::from(db_path) } } pub fn connect(&self) -> Connection { Connection::open(&self.connection).unwrap() } pub fn clean_tables(&mut self, lang: &Language) { let mut conn = self.connect(); let transaction = conn.transaction().unwrap(); transaction.execute(&format!("DROP TABLE IF EXISTS {0}_words", &lang.code), []).unwrap(); transaction.execute(&format!("DROP TABLE IF EXISTS {0}_types", &lang.code), []).unwrap(); transaction.execute(&format!(" CREATE TABLE {0}_types ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, name TINYTEXT UNIQUE NOT NULL )", &lang.code), []).unwrap(); for type_ in &lang.types { transaction.execute(&format!(" INSERT INTO {0}_types ( name ) VALUES ( ? )", &lang.code), [type_]).unwrap(); } transaction.execute(&format!(" CREATE TABLE {0}_words ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, word TINYTEXT NOT NULL, type_id INTEGER NOT NULL, content MEDIUMTEXT NOT NULL, FOREIGN KEY (type_id) REFERENCES {0}_types (id) )", &lang.code), []).unwrap(); transaction.execute(&format!(" CREATE INDEX word_index ON {0}_words (word) ", &lang.code), []).unwrap(); transaction.commit().unwrap(); } pub fn insert_entry(&self, transaction: &Transaction, lang: &Language, entry: &WiktionaryEntry) { transaction.execute(&format!(" INSERT INTO {0}_words ( word, content, type_id ) VALUES ( ?, ?, (SELECT id FROM {0}_types WHERE name = ?) )", &lang.code), params![entry.word, entry.parsed_json.to_string(), entry.type_] ).unwrap(); } pub fn insert_entries(&mut self, lang: &Language, entries: &WiktionaryEntries) { let mut conn = self.connect(); let transaction = conn.transaction().unwrap(); for entry in entries.iter() { self.insert_entry(&transaction, lang, entry); } transaction.commit().unwrap(); } /// Generate missing "form-of" entries pub fn generate_entries(&mut self, lang: &Language, entries: &WiktionaryEntries) { let mut conn = self.connect(); let transaction = conn.transaction().unwrap(); let mut statement = transaction.prepare(&format!( "SELECT {0}_words.content FROM {0}_words JOIN {0}_types ON {0}_types.id = {0}_words.type_id WHERE {0}_words.word = ? AND {0}_types.name = ?", &lang.code) ).unwrap(); for entry in entries.iter() { if let Some(forms) = entry.parsed_json["forms"].as_array() { let mut forms_vec: Vec
= Vec::new(); for form in forms { let form: Form = serde_json::from_value(form.clone()).unwrap(); forms_vec.push(form); } forms_vec = forms_vec.into_iter() .filter(|x| match &x.source { Some(src) => src == "Declension" || src == "Conjugation", None => false } ).collect(); forms_vec.sort_by_key(|x| x.form.clone()); let forms_group = forms_vec.group_by(|a, b| a.form == b.form); for forms in forms_group.into_iter() { let mut entries = statement.query([&forms[0].form, &entry.type_]).unwrap(); if let None = entries.next().unwrap() { let mut senses: Vec = Vec::new(); for form in forms { let mut tags = form.tags.clone(); tags.push(String::from("form-of")); tags.push(String::from("auto-generated")); senses.push(json!({ "form_of": [ { "word": entry.word } ], "glosses": [ form.tags.join(" ") ], "tags": tags })); } let entry_json = json!({ "pos": entry.type_.clone(), "word": forms[0].form.clone(), "senses": senses }); let new_entry = WiktionaryEntry::new(forms[0].form.clone(), entry.type_.clone(), entry_json); self.insert_entry(&transaction, lang, &new_entry); } } } } drop(statement); transaction.commit().unwrap(); } pub async fn upgrade_lang(&mut self, lang: &Language) { println!("Trying to read cached data..."); let cached_data = fs::read_to_string("Polish.json"); let mut request = None; if let Err(_) = cached_data { request = Some(reqwest::get("https://kaikki.org/dictionary/Polish/kaikki.org-dictionary-Polish.json")); } println!("Cleaning tables..."); self.clean_tables(lang); let data; if let Some(request) = request { // Actually, the request was sent before println!("Requesting data..."); data = request.await.unwrap().text().await.unwrap(); println!("Caching data..."); fs::write("Polish.json", &data).unwrap(); } else { data = cached_data.unwrap(); } println!("Parsing data..."); let entries = WiktionaryEntries::parse_data(data); println!("Inserting data..."); self.insert_entries(lang, &entries); println!("Generating \"form-of\" entries..."); self.generate_entries(lang, &entries); println!("Done"); } }