Initial commit
This commit is contained in:
113
src/database.rs
Normal file
113
src/database.rs
Normal file
@@ -0,0 +1,113 @@
|
||||
use std::fs;
|
||||
|
||||
use reqwest;
|
||||
use rusqlite::Connection;
|
||||
use rusqlite::params;
|
||||
|
||||
use crate::language::Language;
|
||||
use crate::entry::WiktionaryEntries;
|
||||
|
||||
/// A database of Wiktionary entries
|
||||
pub struct WordDb {
|
||||
connection: String
|
||||
}
|
||||
|
||||
impl WordDb {
|
||||
pub fn new(db_path: &str) -> Self {
|
||||
Self {
|
||||
connection: String::from(db_path)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn connect(&self) -> Connection {
|
||||
Connection::open(&self.connection).unwrap()
|
||||
}
|
||||
|
||||
pub fn clean_tables(&mut self, lang: &Language) {
|
||||
let mut connection = self.connect();
|
||||
let transaction = connection.transaction().unwrap();
|
||||
|
||||
transaction.execute(&format!("DROP TABLE IF EXISTS {}_words", &lang.code), []).unwrap();
|
||||
transaction.execute(&format!("DROP TABLE IF EXISTS {}_types", &lang.code), []).unwrap();
|
||||
|
||||
transaction.execute(&format!("
|
||||
CREATE TABLE {}_types (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
name TINYTEXT UNIQUE NOT NULL
|
||||
)", &lang.code), []).unwrap();
|
||||
|
||||
for type_ in &lang.types {
|
||||
transaction.execute(&format!("
|
||||
INSERT INTO {}_types ( name )
|
||||
VALUES (
|
||||
?
|
||||
)", &lang.code), [type_]).unwrap();
|
||||
}
|
||||
|
||||
transaction.execute(&format!("
|
||||
CREATE TABLE {}_words (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
|
||||
word TINYTEXT UNIQUE NOT NULL,
|
||||
type_id INTEGER NOT NULL,
|
||||
content MEDIUMTEXT NOT NULL,
|
||||
FOREIGN KEY (type_id)
|
||||
REFERENCES {}_types (id)
|
||||
)", &lang.code, &lang.code), []).unwrap();
|
||||
|
||||
transaction.commit().unwrap();
|
||||
}
|
||||
|
||||
pub fn insert_entries(&mut self, lang: &Language, entries: WiktionaryEntries) {
|
||||
let mut connection = self.connect();
|
||||
let transaction = connection.transaction().unwrap();
|
||||
|
||||
for entry in entries {
|
||||
transaction.execute(&format!("
|
||||
INSERT INTO {}_words ( word, content, type_id )
|
||||
VALUES (
|
||||
?, ?,
|
||||
(SELECT id FROM {}_types WHERE name = ?)
|
||||
)", &lang.code, &lang.code),
|
||||
params![entry.word,
|
||||
entry.parsed_json.to_string(),
|
||||
entry.type_]
|
||||
).unwrap();
|
||||
}
|
||||
|
||||
transaction.commit().unwrap();
|
||||
}
|
||||
|
||||
pub async fn upgrade_lang(&mut self, lang: &Language) {
|
||||
println!("Trying to read cached data...");
|
||||
let cached_data = fs::read_to_string("Polish.json");
|
||||
let mut request = None;
|
||||
|
||||
if let Err(_) = cached_data {
|
||||
request = Some(reqwest::get("https://kaikki.org/dictionary/Polish/kaikki.org-dictionary-Polish.json"));
|
||||
}
|
||||
|
||||
println!("Cleaning tables...");
|
||||
self.clean_tables(lang);
|
||||
|
||||
let data;
|
||||
if let Some(request) = request {
|
||||
// Actually, the request was sent before
|
||||
println!("Requesting data...");
|
||||
data = request.await.unwrap().text().await.unwrap();
|
||||
println!("Caching data...");
|
||||
fs::write("Polish.json", &data).unwrap();
|
||||
}
|
||||
else {
|
||||
data = cached_data.unwrap();
|
||||
}
|
||||
|
||||
println!("Parsing data...");
|
||||
let entries = WiktionaryEntries::parse_data(data)
|
||||
.merge_duplicates();
|
||||
|
||||
println!("Inserting data...");
|
||||
self.insert_entries(lang, entries);
|
||||
|
||||
println!("Done");
|
||||
}
|
||||
}
|
136
src/entry.rs
Normal file
136
src/entry.rs
Normal file
@@ -0,0 +1,136 @@
|
||||
use std::cmp;
|
||||
use std::iter::IntoIterator;
|
||||
use json::JsonValue::{Object, Short, Array};
|
||||
use json::JsonValue;
|
||||
|
||||
#[derive (Clone)]
|
||||
#[derive (Debug)]
|
||||
pub struct WiktionaryEntry {
|
||||
pub word: String,
|
||||
pub type_: String,
|
||||
pub parsed_json: JsonValue
|
||||
}
|
||||
|
||||
impl cmp::PartialEq for WiktionaryEntry {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.word.eq(&other.word)
|
||||
}
|
||||
}
|
||||
|
||||
impl cmp::Eq for WiktionaryEntry {}
|
||||
|
||||
impl cmp::PartialOrd for WiktionaryEntry {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<cmp::Ordering> {
|
||||
Some(self.cmp(other))
|
||||
}
|
||||
}
|
||||
|
||||
impl cmp::Ord for WiktionaryEntry {
|
||||
fn cmp(&self, other: &Self) -> cmp::Ordering {
|
||||
self.word.cmp(&other.word)
|
||||
}
|
||||
}
|
||||
|
||||
impl WiktionaryEntry {
|
||||
fn merge(first: Self, second: Self) -> Self {
|
||||
let output_parsed: JsonValue = match first.parsed_json {
|
||||
Array(mut objs) => {
|
||||
objs.push(second.parsed_json);
|
||||
JsonValue::Array(objs)
|
||||
},
|
||||
Object(_) => {
|
||||
let mut objs: Vec<JsonValue> = Vec::new();
|
||||
objs.push(first.parsed_json);
|
||||
objs.push(second.parsed_json);
|
||||
JsonValue::Array(objs)
|
||||
},
|
||||
_ => panic!("Expected array or object, found {}", first.parsed_json.pretty(8))
|
||||
};
|
||||
|
||||
Self {
|
||||
word: first.word,
|
||||
type_: first.type_,
|
||||
parsed_json: output_parsed
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(unparsed_json: &str) -> Self {
|
||||
let json = json::parse(unparsed_json).unwrap();
|
||||
|
||||
let (word, type_) = match &json {
|
||||
Object(o) => (
|
||||
match o.get("word") {
|
||||
Some(w) => match w {
|
||||
Short(s) => s.to_string(),
|
||||
JsonValue::String(s) => s.clone(),
|
||||
_ => panic!("Not a string: {}", w.pretty(8))
|
||||
},
|
||||
None => panic!("No field 'word': {}", o.pretty(8))
|
||||
},
|
||||
match o.get("pos") {
|
||||
Some(w) => match w {
|
||||
Short(s) => s.to_string(),
|
||||
JsonValue::String(s) => s.clone(),
|
||||
_ => panic!("Not a string: {}", w.pretty(8))
|
||||
},
|
||||
None => panic!("No field 'pos': {}", o.pretty(8))
|
||||
}
|
||||
),
|
||||
_ => panic!("Not an object: {}", json.pretty(8))
|
||||
};
|
||||
|
||||
Self {
|
||||
word,
|
||||
type_,
|
||||
parsed_json: json
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct WiktionaryEntries(Vec<WiktionaryEntry>);
|
||||
|
||||
impl WiktionaryEntries {
|
||||
pub fn parse_data(data: String) -> Self {
|
||||
let mut entries: Vec<WiktionaryEntry> = Vec::new();
|
||||
|
||||
for line in data.lines() {
|
||||
entries.push(WiktionaryEntry::parse(line));
|
||||
}
|
||||
|
||||
Self(entries)
|
||||
}
|
||||
|
||||
pub fn merge_duplicates(mut self) -> Self {
|
||||
self.0.sort();
|
||||
|
||||
let mut entries = self.0.into_iter();
|
||||
let mut last_entry: WiktionaryEntry = entries.next().unwrap();
|
||||
|
||||
let mut new_entries = Vec::new();
|
||||
|
||||
for entry in entries {
|
||||
if last_entry == entry {
|
||||
last_entry = WiktionaryEntry::merge(last_entry, entry);
|
||||
}
|
||||
else {
|
||||
new_entries.push(last_entry);
|
||||
last_entry = entry;
|
||||
}
|
||||
}
|
||||
|
||||
new_entries.push(last_entry);
|
||||
|
||||
self.0 = new_entries;
|
||||
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl IntoIterator for WiktionaryEntries {
|
||||
type Item = WiktionaryEntry;
|
||||
type IntoIter = std::vec::IntoIter<Self::Item>;
|
||||
|
||||
fn into_iter(self) -> Self::IntoIter {
|
||||
self.0.into_iter()
|
||||
}
|
||||
}
|
14
src/language.rs
Normal file
14
src/language.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
#[derive(Debug)]
|
||||
pub struct Language {
|
||||
pub code: String,
|
||||
pub types: Vec<String>
|
||||
}
|
||||
|
||||
impl Language {
|
||||
pub fn new(code: &str, types: Vec<String>) -> Self {
|
||||
Self {
|
||||
code: String::from(code),
|
||||
types
|
||||
}
|
||||
}
|
||||
}
|
94
src/main.rs
Normal file
94
src/main.rs
Normal file
@@ -0,0 +1,94 @@
|
||||
//mod database;
|
||||
use rocket::routes;
|
||||
use rocket::fs::FileServer;
|
||||
use rocket::data::{Limits, ToByteUnit};
|
||||
use clap::{App, AppSettings, Arg, SubCommand};
|
||||
//use database::WordDb;
|
||||
mod database;
|
||||
mod language;
|
||||
mod entry;
|
||||
mod routes;
|
||||
|
||||
use database::WordDb;
|
||||
use language::Language;
|
||||
|
||||
#[rocket::main]
|
||||
async fn main() {
|
||||
let matches = App::new("inflectived")
|
||||
.version("0.1")
|
||||
.author("Augusto Gunsch <augustogunsch@tutanota.com>")
|
||||
.about("inflective daemon")
|
||||
.subcommands(vec![
|
||||
SubCommand::with_name("upgrade")
|
||||
.about("Upgrade or install a language database")
|
||||
.arg(
|
||||
Arg::with_name("LANG")
|
||||
.required(true)
|
||||
.index(1)
|
||||
.help("Language database to upgrade"),
|
||||
),
|
||||
SubCommand::with_name("run").about("Run the daemon").arg(
|
||||
Arg::with_name("port")
|
||||
.short("p")
|
||||
.long("port")
|
||||
.value_name("PORT")
|
||||
.help("Port to run the server on")
|
||||
.takes_value(true),
|
||||
),
|
||||
SubCommand::with_name("list")
|
||||
.about("List language databases")
|
||||
.arg(
|
||||
Arg::with_name("installed")
|
||||
.short("i")
|
||||
.long("installed")
|
||||
.help("List only installed databases"),
|
||||
),
|
||||
SubCommand::with_name("passwd").about("Set admin password for remote management"),
|
||||
])
|
||||
.setting(AppSettings::SubcommandRequiredElseHelp)
|
||||
.get_matches();
|
||||
|
||||
let mut db = WordDb::new("test.db");
|
||||
|
||||
let lang = Language::new("polish",
|
||||
vec![String::from("adj"),
|
||||
String::from("noun"),
|
||||
String::from("verb"),
|
||||
String::from("character"),
|
||||
String::from("suffix"),
|
||||
String::from("prefix"),
|
||||
String::from("conj"),
|
||||
String::from("adv"),
|
||||
String::from("infix"),
|
||||
String::from("name"),
|
||||
String::from("phrase"),
|
||||
String::from("prep_phrase"),
|
||||
String::from("intj"),
|
||||
String::from("det"),
|
||||
String::from("prep"),
|
||||
String::from("proverb"),
|
||||
String::from("abbrev"),
|
||||
String::from("num"),
|
||||
String::from("pron"),
|
||||
String::from("punct"),
|
||||
String::from("interfix"),
|
||||
String::from("particle")]);
|
||||
|
||||
match matches.subcommand() {
|
||||
("upgrade", _) => { db.upgrade_lang(&lang).await; },
|
||||
("run", _) => {
|
||||
let figment = rocket::Config::figment()
|
||||
.merge(("address", "0.0.0.0"));
|
||||
|
||||
rocket::custom(figment)
|
||||
.manage(db)
|
||||
.mount("/static", FileServer::from("static/"))
|
||||
.mount("/", routes![routes::get_word,
|
||||
routes::get_word_like,
|
||||
routes::frontend])
|
||||
.launch()
|
||||
.await.unwrap();
|
||||
},
|
||||
_ => {}
|
||||
}
|
||||
}
|
59
src/routes.rs
Normal file
59
src/routes.rs
Normal file
@@ -0,0 +1,59 @@
|
||||
use std::fs;
|
||||
|
||||
use rocket::get;
|
||||
use rocket::State;
|
||||
use rocket::http::Status;
|
||||
use rocket::response::{content, status};
|
||||
use rocket::serde::json::Json;
|
||||
use rusqlite::params;
|
||||
|
||||
use crate::database::WordDb;
|
||||
|
||||
#[get("/frontend")]
|
||||
pub fn frontend() -> Option<content::Html<String>> {
|
||||
match fs::read_to_string("static/index.html") {
|
||||
Ok(file) => Some(content::Html(file)),
|
||||
Err(_) => None
|
||||
}
|
||||
}
|
||||
|
||||
#[get("/langs/<lang>/words/<word>")]
|
||||
pub fn get_word(db: &State<WordDb>, lang: &str, word: &str) -> Option<content::Json<String>> {
|
||||
let connection = db.connect();
|
||||
|
||||
let word = connection.query_row(&format!(
|
||||
"SELECT content FROM {}_words WHERE word = ?",
|
||||
lang),
|
||||
[word],
|
||||
|row| row.get(0)
|
||||
);
|
||||
|
||||
match word {
|
||||
Ok(w) => Some(content::Json(w)),
|
||||
Err(_) => None
|
||||
}
|
||||
}
|
||||
|
||||
#[get("/langs/<lang>/words?<like>&<limit>&<offset>")]
|
||||
pub fn get_word_like(db: &State<WordDb>, lang: &str, like: &str, limit: usize, offset: usize) -> Json<Vec<String>> {
|
||||
let connection = db.connect();
|
||||
|
||||
let mut statement = connection.prepare(&format!(
|
||||
"SELECT word
|
||||
FROM {}_words
|
||||
WHERE word LIKE ?
|
||||
ORDER BY length(word) ASC
|
||||
LIMIT ?
|
||||
OFFSET ?",
|
||||
lang)
|
||||
).unwrap();
|
||||
|
||||
let mut rows = statement.query(params![format!("%{}%", like), limit, offset]).unwrap();
|
||||
|
||||
let mut words = Vec::new();
|
||||
while let Some(row) = rows.next().unwrap() {
|
||||
words.push(row.get(0).unwrap());
|
||||
}
|
||||
|
||||
Json(words)
|
||||
}
|
Reference in New Issue
Block a user