use std::fs; use std::io::ErrorKind; use std::process::exit; use reqwest; use rusqlite::{Connection, Transaction, ErrorCode}; use rusqlite::Error::SqliteFailure; use rusqlite::params; use serde_json::Value; use serde_json::json; use serde_json; use crate::language::Language; use crate::entry::{WiktionaryEntries, WiktionaryEntry}; use crate::entry::Form; const DB_DIR: &str = "/usr/share/inflectived/"; const CACHE_DIR: &str = "/var/cache/"; /// A database of Wiktionary entries pub struct WordDb { db_path: String } impl WordDb { pub fn new(db_name: &str) -> Self { let mut db_path = String::from(DB_DIR); db_path.push_str(db_name); Self { db_path } } pub fn connect(&self) -> Connection { Connection::open(&self.db_path).unwrap() } pub fn clean_tables(&mut self, lang: &Language) { let mut conn = self.connect(); let transaction = conn.transaction().unwrap(); if let Err(e) = transaction.execute(&format!("DROP TABLE IF EXISTS {0}_words", &lang.code), []) { match e { SqliteFailure(f, _) => match f.code { ErrorCode::ReadOnly => { eprintln!("Could not write to database: Permission denied"); eprintln!("Please run as root"); exit(1); }, _ => panic!("{}", e) }, _ => panic!("{}", e) } } transaction.execute(&format!("DROP TABLE IF EXISTS {0}_types", &lang.code), []).unwrap(); transaction.execute(&format!(" CREATE TABLE {0}_types ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, name TINYTEXT UNIQUE NOT NULL )", &lang.code), []).unwrap(); for type_ in &lang.types { transaction.execute(&format!(" INSERT INTO {0}_types ( name ) VALUES ( ? )", &lang.code), [type_]).unwrap(); } transaction.execute(&format!(" CREATE TABLE {0}_words ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, word TINYTEXT NOT NULL, type_id INTEGER NOT NULL, content MEDIUMTEXT NOT NULL, FOREIGN KEY (type_id) REFERENCES {0}_types (id) )", &lang.code), []).unwrap(); transaction.execute(&format!(" CREATE INDEX word_index ON {0}_words (word) ", &lang.code), []).unwrap(); transaction.commit().unwrap(); } pub fn insert_entry(&self, transaction: &Transaction, lang: &Language, entry: &WiktionaryEntry) { transaction.execute(&format!(" INSERT INTO {0}_words ( word, content, type_id ) VALUES ( ?, ?, (SELECT id FROM {0}_types WHERE name = ?) )", &lang.code), params![entry.word, entry.parsed_json.to_string(), entry.type_] ).unwrap(); } pub fn insert_entries(&mut self, lang: &Language, entries: &WiktionaryEntries) { let mut conn = self.connect(); let transaction = conn.transaction().unwrap(); for entry in entries.iter() { self.insert_entry(&transaction, lang, entry); } transaction.commit().unwrap(); } /// Generate missing "form-of" entries pub fn generate_entries(&mut self, lang: &Language, entries: &WiktionaryEntries) { let mut conn = self.connect(); let transaction = conn.transaction().unwrap(); let mut statement = transaction.prepare(&format!( "SELECT {0}_words.content FROM {0}_words JOIN {0}_types ON {0}_types.id = {0}_words.type_id WHERE {0}_words.word = ? AND {0}_types.name = ?", &lang.code) ).unwrap(); for entry in entries.iter() { if let Some(forms) = entry.parsed_json["forms"].as_array() { let mut forms_vec: Vec
= Vec::new(); for form in forms { let form: Form = serde_json::from_value(form.clone()).unwrap(); forms_vec.push(form); } forms_vec = forms_vec.into_iter() .filter(|x| match &x.source { Some(src) => src == "Declension" || src == "Conjugation", None => false } ).collect(); forms_vec.sort_by_key(|x| x.form.clone()); let forms_group = forms_vec.group_by(|a, b| a.form == b.form); for forms in forms_group.into_iter() { let mut entries = statement.query([&forms[0].form, &entry.type_]).unwrap(); if let None = entries.next().unwrap() { let mut senses: Vec = Vec::new(); for form in forms { let mut tags = form.tags.clone(); tags.push(String::from("form-of")); tags.push(String::from("auto-generated")); senses.push(json!({ "form_of": [ { "word": entry.word } ], "glosses": [ form.tags.join(" ") ], "tags": tags })); } let entry_json = json!({ "pos": entry.type_.clone(), "word": forms[0].form.clone(), "senses": senses }); let new_entry = WiktionaryEntry::new(forms[0].form.clone(), entry.type_.clone(), entry_json); self.insert_entry(&transaction, lang, &new_entry); } } } } drop(statement); transaction.commit().unwrap(); } fn try_create_dir(&self, dir: &str) { match fs::create_dir(dir) { Err(e) => match e.kind() { ErrorKind::AlreadyExists => {}, _ => panic!("{}", e) }, _ => {} } } pub async fn upgrade_lang(&mut self, lang: &Language) { self.try_create_dir(DB_DIR); println!("Trying to read cached data..."); let mut cache_file = String::from(CACHE_DIR); cache_file.push_str("Polish.json"); let cached_data = fs::read_to_string(&cache_file); let mut request = None; if let Err(_) = cached_data { request = Some(reqwest::get("https://kaikki.org/dictionary/Polish/kaikki.org-dictionary-Polish.json")); } println!("Cleaning tables..."); self.clean_tables(lang); let data; if let Some(request) = request { // Actually, the request was sent before println!("Requesting data..."); data = request.await.unwrap().text().await.unwrap(); if cfg!(unix) { println!("Caching data..."); self.try_create_dir(CACHE_DIR); fs::write(&cache_file, &data).unwrap(); } } else { data = cached_data.unwrap(); } println!("Parsing data..."); let entries = WiktionaryEntries::parse_data(data); println!("Inserting data..."); self.insert_entries(lang, &entries); println!("Generating \"form-of\" entries..."); self.generate_entries(lang, &entries); println!("Done"); } }