SwissText
Getting Started
Installation
Installation on a Linux server
Installation on MacOS
Usage
Running MongoDB
Running the frontend
Running the backend
Configuration options
Current ?best? pipeline configuration
Using default tools
Using extra tools
Tool dependencies
API
API Overview
swisstext.cmd
Configuration files
Link utilities
Dealing with links in a page
Dealing with search results
swisstext.cmd.scraping
st_scrape
dump_config
from_file
from_mongo
gen_seeds
Tool interfaces
Tool implementations
Deciders
Seed creators
Crawlers
Normalizers
Splitters
Sentence Filters
Link Filters
Language Detectors
Savers
Pipeline implementation
Configuration
Data structures
Queue
Pipeline
swisstext.cmd.searching
st_search
dump_config
from_file
from_mongo
Tool interfaces
Tool implementations
Savers
Searchers
Pipeline implementation
Configuration
Data structures
Search engine
swisstext.mongo
Installation
Collections
About the code
Abstract Definitions
Common structures and embedded documents
Seed collection
Sentences collection
URLs and Blacklist collections
Users collection
MongoEngine-ready classes
swisstext.alswiki
st_alswiki
download
parse
process
txt
Processing Alswiki dumps
Processing text files
TODOs
Other
Ideas
Sentence Filtering
URL Filtering
About this documentation
Generation
Deploying on github-pages
SwissText
Docs
»
Index
Edit on GitHub
Index
Symbols
|
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
J
|
K
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
U
|
V
Symbols
--how <how>
st_scrape-from_mongo command line option
--new, --any
st_scrape-gen_seeds command line option
st_search-from_mongo command line option
--no-search
st_search-from_file command line option
--what <what>
st_scrape-from_mongo command line option
-c, --config-path <config_path>
st_alswiki-process command line option
st_alswiki-txt command line option
st_scrape command line option
st_search command line option
-c, --confirm
st_scrape-gen_seeds command line option
-d, --db <db>
st_alswiki-process command line option
st_scrape command line option
st_search command line option
-d, --dir <dir>
st_alswiki-download command line option
-f, --format <format>
st_alswiki-txt command line option
-l, --log-level <log_level>
st_alswiki command line option
st_scrape command line option
st_search command line option
-m, --min-chars <min_chars>
st_alswiki-parse command line option
-n, --num <num>
st_scrape-gen_seeds command line option
-n, --num-seeds <num_seeds>
st_search-from_mongo command line option
-n, --num-urls <num_urls>
st_scrape-from_mongo command line option
-p, --min-proba <min_proba>
st_alswiki-txt command line option
-s, --num-sentences <num_sentences>
st_scrape-gen_seeds command line option
-t, --test
st_scrape-dump_config command line option
st_search-dump_config command line option
_
__init__() (swisstext.cmd.base_config.BaseConfig method)
(swisstext.cmd.scraping.config.Config method)
(swisstext.cmd.scraping.config.Config.Options method)
(swisstext.cmd.scraping.data.Page method)
(swisstext.cmd.scraping.data.PageScore method)
(swisstext.cmd.scraping.data.Sentence method)
(swisstext.cmd.scraping.interfaces.ICrawler.CrawlError method)
(swisstext.cmd.scraping.interfaces.ICrawler.CrawlResults method)
(swisstext.cmd.scraping.interfaces.ISaver method)
(swisstext.cmd.scraping.pipeline.Pipeline method)
(swisstext.cmd.scraping.pipeline.PipelineWorker method)
(swisstext.cmd.scraping.tools.basic_decider.BasicDecider method)
(swisstext.cmd.scraping.tools.basic_seed_creator.BasicSeedCreator method)
(swisstext.cmd.scraping.tools.basic_seed_creator.IdfSeedCreator method)
(swisstext.cmd.scraping.tools.bs_crawler.BsCrawler method)
(swisstext.cmd.scraping.tools.console_saver.ConsoleSaver method)
(swisstext.cmd.scraping.tools.justext_crawler.JustextCrawler method)
(swisstext.cmd.scraping.tools.mocy_splitter.MocySplitter method)
(swisstext.cmd.scraping.tools.mongo_saver.MongoSaver method)
(swisstext.cmd.scraping.tools.moses_splitter.MosesSplitter method)
(swisstext.cmd.scraping.tools.norm_punc.Normalizer method)
(swisstext.cmd.scraping.tools.pattern_sentence_filter.PatternSentenceFilter method)
(swisstext.cmd.scraping.tools.punkt_splitter.PunktSplitter method)
(swisstext.cmd.scraping.tools.swigspot_langid.SwigspotLangid method)
(swisstext.cmd.searching.config.Config method)
(swisstext.cmd.searching.config.Config.Options method)
(swisstext.cmd.searching.data.Seed method)
(swisstext.cmd.searching.pipeline.SearchEngine method)
(swisstext.cmd.searching.tools.console_saver.ConsoleSaver method)
(swisstext.cmd.searching.tools.google_search.GoogleGenerator method)
(swisstext.cmd.searching.tools.google_search.GoogleGeneratorFactory method)
(swisstext.cmd.searching.tools.mongo_saver.MongoSaver method)
(swisstext.cmd.searching.tools.start_page.StartPageGenerator method)
(swisstext.cmd.searching.tools.start_page.StartPageGeneratorFactory method)
A
AbstractMongoBlacklist (class in swisstext.mongo.abstract.urls)
AbstractMongoSeed (class in swisstext.mongo.abstract.seeds)
AbstractMongoSentence (class in swisstext.mongo.abstract.sentences)
AbstractMongoURL (class in swisstext.mongo.abstract.urls)
AbstractMongoUser (class in swisstext.mongo.abstract.users)
add_crawl_history() (swisstext.mongo.abstract.urls.AbstractMongoURL method)
add_label() (swisstext.mongo.abstract.sentences.AbstractMongoSentence class method)
(swisstext.mongo.abstract.sentences.DialectInfo method)
add_search_history() (swisstext.mongo.abstract.seeds.AbstractMongoSeed method)
add_url() (swisstext.mongo.abstract.urls.AbstractMongoBlacklist class method)
ADMIN (swisstext.mongo.abstract.users.UserRoles attribute)
AUTO (swisstext.mongo.abstract.generic.SourceType attribute)
B
BASE_URL (in module swisstext.cmd.searching.tools.google_search)
BaseConfig (class in swisstext.cmd.base_config)
BasicDecider (class in swisstext.cmd.scraping.tools.basic_decider)
BasicSeedCreator (class in swisstext.cmd.scraping.tools.basic_seed_creator)
blacklist_url() (swisstext.cmd.scraping.interfaces.ISaver method)
(swisstext.cmd.scraping.tools.console_saver.ConsoleSaver method)
(swisstext.cmd.scraping.tools.mongo_saver.MongoSaver method)
blacklisted (swisstext.cmd.scraping.data.Page attribute)
BLACKLISTED (swisstext.cmd.searching.interfaces.ISaver.LinkStatus attribute)
BsCrawler (class in swisstext.cmd.scraping.tools.bs_crawler)
by (swisstext.mongo.abstract.generic.Deleted attribute)
C
check_link() (swisstext.cmd.searching.pipeline.SearchEngine method)
cleanup_spaces() (swisstext.cmd.scraping.tools.mocy_splitter.MocySplitter class method)
(swisstext.cmd.scraping.tools.moses_splitter.MosesSplitter class method)
CleverBsCrawler (class in swisstext.cmd.scraping.tools.bs_crawler)
close() (swisstext.cmd.scraping.tools.console_saver.ConsoleSaver method)
comment (swisstext.mongo.abstract.generic.Deleted attribute)
confidence (swisstext.mongo.abstract.sentences.DialectInfo attribute)
Config (class in swisstext.cmd.scraping.config)
(class in swisstext.cmd.searching.config)
Config.Options (class in swisstext.cmd.scraping.config)
(class in swisstext.cmd.searching.config)
ConsoleSaver (class in swisstext.cmd.scraping.tools.console_saver)
(class in swisstext.cmd.searching.tools.console_saver)
count (swisstext.cmd.scraping.data.PageScore attribute)
(swisstext.mongo.abstract.generic.CrawlMeta attribute)
(swisstext.mongo.abstract.seeds.AbstractMongoSeed attribute)
(swisstext.mongo.abstract.sentences.DialectInfo attribute)
(swisstext.mongo.abstract.urls.AbstractMongoURL attribute)
crawl() (swisstext.cmd.scraping.interfaces.ICrawler method)
(swisstext.cmd.scraping.tools.bs_crawler.BsCrawler method)
(swisstext.cmd.scraping.tools.justext_crawler.JustextCrawler method)
crawl_depth (swisstext.cmd.scraping.config.Config.Options attribute)
crawl_history (swisstext.mongo.abstract.urls.AbstractMongoURL attribute)
crawl_proba (swisstext.mongo.abstract.sentences.AbstractMongoSentence attribute)
crawl_results (swisstext.cmd.scraping.data.Page attribute)
CrawlMeta (class in swisstext.mongo.abstract.generic)
create() (swisstext.mongo.abstract.seeds.AbstractMongoSeed class method)
(swisstext.mongo.abstract.sentences.AbstractMongoSentence class method)
(swisstext.mongo.abstract.urls.AbstractMongoURL class method)
create_pipeline() (swisstext.cmd.scraping.config.Config method)
create_search_engine() (swisstext.cmd.searching.config.Config method)
ctx (swisstext.cmd.searching.tools.google_search.GoogleGenerator attribute)
D
date (swisstext.mongo.abstract.generic.CrawlMeta attribute)
(swisstext.mongo.abstract.generic.Deleted attribute)
(swisstext.mongo.abstract.sentences.DialectEntry attribute)
date_added (swisstext.mongo.abstract.seeds.AbstractMongoSeed attribute)
(swisstext.mongo.abstract.sentences.AbstractMongoSentence attribute)
(swisstext.mongo.abstract.urls.AbstractMongoBlacklist attribute)
(swisstext.mongo.abstract.urls.AbstractMongoURL attribute)
DEFAULT_HEADERS (in module swisstext.cmd.scraping.tools.bs_crawler)
Deleted (class in swisstext.mongo.abstract.generic)
deleted (swisstext.mongo.abstract.seeds.AbstractMongoSeed attribute)
(swisstext.mongo.abstract.sentences.AbstractMongoSentence attribute)
delta (swisstext.mongo.abstract.urls.AbstractMongoURL attribute)
delta_count (swisstext.cmd.scraping.data.PageScore attribute)
delta_date (swisstext.cmd.scraping.data.PageScore attribute)
(swisstext.mongo.abstract.seeds.AbstractMongoSeed attribute)
(swisstext.mongo.abstract.urls.AbstractMongoURL attribute)
dialect (swisstext.mongo.abstract.sentences.AbstractMongoSentence attribute)
DialectEntry (class in swisstext.mongo.abstract.sentences)
DialectInfo (class in swisstext.mongo.abstract.sentences)
Dialects (in module swisstext.mongo.abstract.generic)
DUMPFILE
st_alswiki-parse command line option
dumps() (swisstext.cmd.base_config.BaseConfig method)
E
empty() (swisstext.cmd.scraping.interfaces.ICrawler.CrawlResults class method)
ERROR (swisstext.mongo.abstract.generic.SourceType attribute)
EXCLUDED_TLDS (in module swisstext.cmd.link_utils)
EXISTS (swisstext.cmd.searching.interfaces.ISaver.LinkStatus attribute)
exists() (swisstext.mongo.abstract.seeds.AbstractMongoSeed class method)
(swisstext.mongo.abstract.sentences.AbstractMongoSentence class method)
(swisstext.mongo.abstract.urls.AbstractMongoBlacklist class method)
(swisstext.mongo.abstract.urls.AbstractMongoURL class method)
extra (swisstext.mongo.abstract.generic.Source attribute)
extract_links() (swisstext.cmd.scraping.tools.bs_crawler.BsCrawler class method)
extract_text_blocks() (swisstext.cmd.scraping.tools.bs_crawler.BsCrawler class method)
(swisstext.cmd.scraping.tools.bs_crawler.CleverBsCrawler method)
F
filter() (swisstext.cmd.scraping.interfaces.ISentenceFilter method)
(swisstext.cmd.scraping.interfaces.IUrlFilter method)
filter_links() (in module swisstext.cmd.link_utils)
find_similar() (swisstext.mongo.abstract.seeds.AbstractMongoSeed class method)
fix() (swisstext.cmd.scraping.interfaces.IUrlFilter method)
fix_url() (in module swisstext.cmd.link_utils)
from_ex() (swisstext.cmd.scraping.interfaces.ICrawler.CrawlError class method)
G
generate_seeds() (swisstext.cmd.scraping.interfaces.ISeedCreator method)
(swisstext.cmd.scraping.tools.basic_seed_creator.BasicSeedCreator method)
(swisstext.cmd.scraping.tools.basic_seed_creator.IdfSeedCreator method)
GENSIMFILE
st_alswiki-process command line option
get() (swisstext.cmd.base_config.BaseConfig method)
(swisstext.mongo.abstract.seeds.AbstractMongoSeed class method)
(swisstext.mongo.abstract.urls.AbstractMongoURL class method)
(swisstext.mongo.abstract.users.AbstractMongoUser class method)
get_content() (swisstext.cmd.scraping.tools.bs_crawler.BsCrawler class method)
get_hash() (swisstext.mongo.abstract.sentences.AbstractMongoSentence static method)
(swisstext.mongo.abstract.urls.AbstractMongoBlacklist static method)
(swisstext.mongo.abstract.urls.AbstractMongoURL static method)
(swisstext.mongo.abstract.users.AbstractMongoUser static method)
get_label_by() (swisstext.mongo.abstract.sentences.DialectInfo method)
get_never_crawled() (swisstext.mongo.abstract.urls.AbstractMongoURL class method)
get_page() (swisstext.cmd.scraping.interfaces.ISaver method)
(swisstext.cmd.scraping.tools.console_saver.ConsoleSaver method)
(swisstext.cmd.scraping.tools.mongo_saver.MongoSaver method)
get_soup() (swisstext.cmd.scraping.tools.bs_crawler.BsCrawler class method)
GET_TIMEOUT (in module swisstext.cmd.scraping.tools.bs_crawler)
GoogleGenerator (class in swisstext.cmd.searching.tools.google_search)
GoogleGeneratorFactory (class in swisstext.cmd.searching.tools.google_search)
H
has_next() (swisstext.cmd.searching.tools.google_search.GoogleGenerator method)
(swisstext.cmd.searching.tools.start_page.StartPageGenerator method)
hash (swisstext.mongo.abstract.generic.CrawlMeta attribute)
I
ICrawler (class in swisstext.cmd.scraping.interfaces)
ICrawler.CrawlError
ICrawler.CrawlResults (class in swisstext.cmd.scraping.interfaces)
id (swisstext.cmd.scraping.pipeline.PipelineWorker attribute)
(swisstext.mongo.abstract.seeds.AbstractMongoSeed attribute)
(swisstext.mongo.abstract.sentences.AbstractMongoSentence attribute)
(swisstext.mongo.abstract.urls.AbstractMongoBlacklist attribute)
(swisstext.mongo.abstract.urls.AbstractMongoURL attribute)
(swisstext.mongo.abstract.users.AbstractMongoUser attribute)
IDecider (class in swisstext.cmd.scraping.interfaces)
IdfSeedCreator (class in swisstext.cmd.scraping.tools.basic_seed_creator)
INCLUDED_WIKI_DOMAINS (in module swisstext.cmd.link_utils)
INormalizer (class in swisstext.cmd.scraping.interfaces)
instantiate_tools() (swisstext.cmd.base_config.BaseConfig method)
INTERFACE_WILDCARD (swisstext.cmd.base_config.BaseConfig attribute)
interfaces_package() (swisstext.cmd.base_config.BaseConfig property)
(swisstext.cmd.scraping.config.Config property)
(swisstext.cmd.searching.config.Config property)
IQueryBuilder (class in swisstext.cmd.searching.interfaces)
is_new() (swisstext.cmd.scraping.data.Page method)
is_url_blacklisted() (swisstext.cmd.scraping.interfaces.ISaver method)
(swisstext.cmd.scraping.tools.console_saver.ConsoleSaver method)
(swisstext.cmd.scraping.tools.mongo_saver.MongoSaver method)
is_valid() (swisstext.cmd.scraping.interfaces.ISentenceFilter method)
(swisstext.cmd.scraping.tools.pattern_sentence_filter.PatternSentenceFilter method)
ISaver (class in swisstext.cmd.scraping.interfaces)
(class in swisstext.cmd.searching.interfaces)
ISaver.LinkStatus (class in swisstext.cmd.searching.interfaces)
ISearcher (class in swisstext.cmd.searching.interfaces)
ISeedCreator (class in swisstext.cmd.scraping.interfaces)
ISentenceFilter (class in swisstext.cmd.scraping.interfaces)
ISgDetector (class in swisstext.cmd.scraping.interfaces)
ISplitter (class in swisstext.cmd.scraping.interfaces)
IUrlFilter (class in swisstext.cmd.scraping.interfaces)
J
JustextCrawler (class in swisstext.cmd.scraping.tools.justext_crawler)
K
key (swisstext.cmd.searching.tools.google_search.GoogleGenerator attribute)
kill_received (swisstext.cmd.scraping.pipeline.PipelineWorker attribute)
kwargs (swisstext.cmd.scraping.tools.basic_seed_creator.IdfSeedCreator attribute)
(swisstext.cmd.scraping.tools.norm_punc.Normalizer attribute)
L
label (swisstext.mongo.abstract.sentences.DialectEntry attribute)
(swisstext.mongo.abstract.sentences.DialectInfo attribute)
labels (swisstext.mongo.abstract.sentences.DialectInfo attribute)
langs (swisstext.cmd.scraping.tools.mocy_splitter.MocySplitter attribute)
link_exists() (swisstext.cmd.searching.interfaces.ISaver method)
(swisstext.cmd.searching.tools.mongo_saver.MongoSaver method)
links (swisstext.cmd.scraping.interfaces.ICrawler.CrawlResults attribute)
load_nb_prefixes() (swisstext.cmd.scraping.tools.mocy_splitter.MocySplitter class method)
(swisstext.cmd.scraping.tools.moses_splitter.MosesSplitter class method)
lock (swisstext.cmd.scraping.page_queue.PageQueue attribute)
M
mark_deleted() (swisstext.mongo.abstract.seeds.AbstractMongoSeed class method)
(swisstext.mongo.abstract.sentences.AbstractMongoSentence class method)
max_fetches (swisstext.cmd.searching.config.Config.Options attribute)
max_results (swisstext.cmd.searching.config.Config.Options attribute)
merge_dicts() (swisstext.cmd.base_config.BaseConfig class method)
min_proba (swisstext.cmd.scraping.config.Config.Options attribute)
min_ratio (swisstext.cmd.scraping.tools.basic_decider.BasicDecider attribute)
min_recrawl_delta (swisstext.cmd.scraping.tools.basic_decider.BasicDecider attribute)
MocySplitter (class in swisstext.cmd.scraping.tools.mocy_splitter)
MongoBlacklist (class in swisstext.mongo.models)
MongoBlacklist.DoesNotExist
MongoBlacklist.MultipleObjectsReturned
MongoSaver (class in swisstext.cmd.scraping.tools.mongo_saver)
(class in swisstext.cmd.searching.tools.mongo_saver)
MongoSeed (class in swisstext.mongo.models)
MongoSeed.DoesNotExist
MongoSeed.MultipleObjectsReturned
MongoSentence (class in swisstext.mongo.models)
MongoSentence.DoesNotExist
MongoSentence.MultipleObjectsReturned
MongoText (class in swisstext.mongo.models)
MongoText.DoesNotExist
MongoText.MultipleObjectsReturned
MongoURL (class in swisstext.mongo.models)
MongoURL.DoesNotExist
MongoURL.MultipleObjectsReturned
MongoUser (class in swisstext.mongo.models)
MongoUser.DoesNotExist
MongoUser.MultipleObjectsReturned
more (swisstext.cmd.scraping.tools.mocy_splitter.MocySplitter attribute)
MosesSplitter (class in swisstext.cmd.scraping.tools.moses_splitter)
N
nb_prefixes (swisstext.cmd.scraping.tools.mocy_splitter.MocySplitter attribute)
new_links (swisstext.cmd.searching.data.Seed attribute)
new_sg (swisstext.cmd.scraping.data.Page attribute)
new_urls (swisstext.cmd.searching.pipeline.SearchEngine attribute)
next() (swisstext.cmd.searching.tools.google_search.GoogleGenerator method)
(swisstext.cmd.searching.tools.start_page.StartPageGenerator method)
ngram_range (swisstext.cmd.scraping.tools.basic_seed_creator.BasicSeedCreator attribute)
normalize() (swisstext.cmd.scraping.interfaces.INormalizer method)
(swisstext.cmd.scraping.tools.norm_punc.Normalizer method)
normalize_all() (swisstext.cmd.scraping.interfaces.INormalizer method)
normalize_text() (in module swisstext.cmd.scraping.tools.norm_punc)
Normalizer (class in swisstext.cmd.scraping.tools.norm_punc)
NOT_EXIST (swisstext.cmd.searching.interfaces.ISaver.LinkStatus attribute)
num_workers (swisstext.cmd.scraping.config.Config.Options attribute)
O
OneNewSgDecider (class in swisstext.cmd.scraping.tools.basic_decider)
OnlyNewDecider (class in swisstext.cmd.scraping.tools.basic_decider)
P
Page (class in swisstext.cmd.scraping.data)
PageQueue (class in swisstext.cmd.scraping.page_queue)
PageScore (class in swisstext.cmd.scraping.data)
parent_url (swisstext.cmd.scraping.data.Page attribute)
password (swisstext.mongo.abstract.users.AbstractMongoUser attribute)
PatternSentenceFilter (class in swisstext.cmd.scraping.tools.pattern_sentence_filter)
Pipeline (class in swisstext.cmd.scraping.pipeline)
PipelineWorker (class in swisstext.cmd.scraping.pipeline)
predict() (swisstext.cmd.scraping.interfaces.ISgDetector method)
(swisstext.cmd.scraping.tools.swigspot_langid.SwigspotLangid method)
predict_lang() (swisstext.cmd.scraping.tools.swigspot_langid.SwigspotLangid method)
predict_one() (swisstext.cmd.scraping.interfaces.ISgDetector method)
prepare() (swisstext.cmd.searching.interfaces.IQueryBuilder method)
proba (swisstext.cmd.scraping.data.Sentence attribute)
process() (swisstext.cmd.searching.pipeline.SearchEngine method)
process_one() (swisstext.cmd.searching.pipeline.SearchEngine method)
PunktSplitter (class in swisstext.cmd.scraping.tools.punkt_splitter)
Q
query (swisstext.cmd.searching.data.Seed attribute)
R
remove_label() (swisstext.mongo.abstract.sentences.AbstractMongoSentence class method)
(swisstext.mongo.abstract.sentences.DialectInfo method)
roles (swisstext.mongo.abstract.users.AbstractMongoUser attribute)
run() (swisstext.cmd.scraping.pipeline.PipelineWorker method)
S
sanitize (swisstext.cmd.scraping.tools.basic_seed_creator.IdfSeedCreator attribute)
sanitize() (swisstext.cmd.scraping.tools.swigspot_langid.SwigspotLangid method)
save_page() (swisstext.cmd.scraping.interfaces.ISaver method)
(swisstext.cmd.scraping.tools.console_saver.ConsoleSaver method)
(swisstext.cmd.scraping.tools.mongo_saver.MongoSaver method)
save_seed() (swisstext.cmd.scraping.interfaces.ISaver method)
(swisstext.cmd.scraping.tools.console_saver.ConsoleSaver method)
(swisstext.cmd.scraping.tools.mongo_saver.MongoSaver method)
(swisstext.cmd.searching.interfaces.ISaver method)
(swisstext.cmd.searching.tools.console_saver.ConsoleSaver method)
(swisstext.cmd.searching.tools.mongo_saver.MongoSaver method)
save_seeds() (swisstext.cmd.scraping.interfaces.ISaver method)
save_url() (swisstext.cmd.scraping.interfaces.ISaver method)
(swisstext.cmd.scraping.tools.console_saver.ConsoleSaver method)
(swisstext.cmd.scraping.tools.mongo_saver.MongoSaver method)
score (swisstext.cmd.scraping.data.Page attribute)
search() (swisstext.cmd.searching.interfaces.ISearcher method)
(swisstext.cmd.searching.tools.google_search.GoogleGeneratorFactory method)
(swisstext.cmd.searching.tools.start_page.StartPageGeneratorFactory method)
search_history (swisstext.mongo.abstract.seeds.AbstractMongoSeed attribute)
SearchEngine (class in swisstext.cmd.searching.pipeline)
Seed (class in swisstext.cmd.searching.data)
SEED (swisstext.mongo.abstract.generic.SourceType attribute)
seed_exists() (swisstext.cmd.searching.interfaces.ISaver method)
(swisstext.cmd.searching.tools.mongo_saver.MongoSaver method)
SEEDSFILE
st_search-from_file command line option
Sentence (class in swisstext.cmd.scraping.data)
sentence_count (swisstext.cmd.scraping.data.Page attribute)
sentence_exists() (swisstext.cmd.scraping.interfaces.ISaver method)
(swisstext.cmd.scraping.tools.console_saver.ConsoleSaver method)
(swisstext.cmd.scraping.tools.mongo_saver.MongoSaver method)
sents_count (swisstext.mongo.abstract.urls.UrlCrawlMeta attribute)
set() (swisstext.cmd.base_config.BaseConfig method)
sg_count (swisstext.cmd.scraping.data.Page attribute)
sg_sents_count (swisstext.mongo.abstract.urls.UrlCrawlMeta attribute)
should_children_be_crawled() (swisstext.cmd.scraping.interfaces.IDecider method)
(swisstext.cmd.scraping.tools.basic_decider.BasicDecider method)
(swisstext.cmd.scraping.tools.basic_decider.OneNewSgDecider method)
should_page_be_crawled() (swisstext.cmd.scraping.interfaces.IDecider method)
(swisstext.cmd.scraping.tools.basic_decider.BasicDecider method)
(swisstext.cmd.scraping.tools.basic_decider.OnlyNewDecider method)
should_url_be_blacklisted() (swisstext.cmd.scraping.interfaces.IDecider method)
(swisstext.cmd.scraping.tools.basic_decider.BasicDecider method)
skip() (swisstext.mongo.abstract.sentences.DialectInfo method)
skipped_by (swisstext.mongo.abstract.sentences.DialectInfo attribute)
Source (class in swisstext.mongo.abstract.generic)
source (swisstext.mongo.abstract.seeds.AbstractMongoSeed attribute)
(swisstext.mongo.abstract.urls.AbstractMongoBlacklist attribute)
(swisstext.mongo.abstract.urls.AbstractMongoURL attribute)
SourceType (class in swisstext.mongo.abstract.generic)
split() (swisstext.cmd.scraping.interfaces.ISplitter method)
(swisstext.cmd.scraping.tools.mocy_splitter.MocySplitter method)
(swisstext.cmd.scraping.tools.moses_splitter.MosesSplitter method)
(swisstext.cmd.scraping.tools.punkt_splitter.PunktSplitter method)
split_all() (swisstext.cmd.scraping.interfaces.ISplitter method)
split_paragraph() (swisstext.cmd.scraping.tools.mocy_splitter.MocySplitter class method)
(swisstext.cmd.scraping.tools.moses_splitter.MosesSplitter class method)
split_sentences() (swisstext.cmd.scraping.tools.mocy_splitter.MocySplitter method)
split_text() (swisstext.cmd.scraping.tools.mocy_splitter.MocySplitter method)
st_alswiki command line option
-l, --log-level <log_level>
st_alswiki-download command line option
-d, --dir <dir>
st_alswiki-parse command line option
-m, --min-chars <min_chars>
DUMPFILE
st_alswiki-process command line option
-c, --config-path <config_path>
-d, --db <db>
GENSIMFILE
st_alswiki-txt command line option
-c, --config-path <config_path>
-f, --format <format>
-p, --min-proba <min_proba>
TEXTFILE
st_scrape command line option
-c, --config-path <config_path>
-d, --db <db>
-l, --log-level <log_level>
st_scrape-dump_config command line option
-t, --test
st_scrape-from_file command line option
URLFILE
st_scrape-from_mongo command line option
--how <how>
--what <what>
-n, --num-urls <num_urls>
st_scrape-gen_seeds command line option
--new, --any
-c, --confirm
-n, --num <num>
-s, --num-sentences <num_sentences>
st_search command line option
-c, --config-path <config_path>
-d, --db <db>
-l, --log-level <log_level>
st_search-dump_config command line option
-t, --test
st_search-from_file command line option
--no-search
SEEDSFILE
st_search-from_mongo command line option
--new, --any
-n, --num-seeds <num_seeds>
StartPageGenerator (class in swisstext.cmd.searching.tools.start_page)
StartPageGeneratorFactory (class in swisstext.cmd.searching.tools.start_page)
SwigspotLangid (class in swisstext.cmd.scraping.tools.swigspot_langid)
swisstext.alswiki (module)
swisstext.cmd (module)
swisstext.cmd.base_config (module)
swisstext.cmd.link_utils (module)
swisstext.cmd.scraping (module)
swisstext.cmd.scraping.config (module)
swisstext.cmd.scraping.data (module)
swisstext.cmd.scraping.interfaces (module)
swisstext.cmd.scraping.page_queue (module)
swisstext.cmd.scraping.pipeline (module)
swisstext.cmd.scraping.tools (module)
swisstext.cmd.scraping.tools.basic_decider (module)
swisstext.cmd.scraping.tools.basic_seed_creator (module)
swisstext.cmd.scraping.tools.bs_crawler (module)
swisstext.cmd.scraping.tools.console_saver (module)
swisstext.cmd.scraping.tools.justext_crawler (module)
swisstext.cmd.scraping.tools.mocy_splitter (module)
swisstext.cmd.scraping.tools.mongo_saver (module)
swisstext.cmd.scraping.tools.moses_splitter (module)
swisstext.cmd.scraping.tools.norm_punc (module)
swisstext.cmd.scraping.tools.pattern_sentence_filter (module)
swisstext.cmd.scraping.tools.punkt_splitter (module)
swisstext.cmd.scraping.tools.swigspot_langid (module)
swisstext.cmd.searching (module)
swisstext.cmd.searching.config (module)
swisstext.cmd.searching.data (module)
swisstext.cmd.searching.interfaces (module)
swisstext.cmd.searching.pipeline (module)
swisstext.cmd.searching.tools (module)
swisstext.cmd.searching.tools.console_saver (module)
swisstext.cmd.searching.tools.google_search (module)
swisstext.cmd.searching.tools.mongo_saver (module)
swisstext.cmd.searching.tools.start_page (module)
swisstext.mongo (module)
swisstext.mongo.abstract (module)
swisstext.mongo.abstract.generic (module)
swisstext.mongo.abstract.seeds (module)
swisstext.mongo.abstract.sentences (module)
swisstext.mongo.abstract.urls (module)
swisstext.mongo.abstract.users (module)
swisstext.mongo.models (module)
T
text (swisstext.cmd.scraping.data.Page attribute)
(swisstext.cmd.scraping.data.Sentence attribute)
(swisstext.cmd.scraping.interfaces.ICrawler.CrawlResults attribute)
(swisstext.mongo.abstract.sentences.AbstractMongoSentence attribute)
TEXTFILE
st_alswiki-txt command line option
tool_entry_name() (swisstext.cmd.base_config.BaseConfig property)
(swisstext.cmd.scraping.config.Config property)
(swisstext.cmd.searching.config.Config property)
top_results() (swisstext.cmd.searching.interfaces.ISearcher method)
try_delete() (swisstext.mongo.abstract.urls.AbstractMongoURL class method)
type_ (swisstext.mongo.abstract.generic.Source attribute)
U
UNKNOWN (swisstext.mongo.abstract.generic.SourceType attribute)
unmark_deleted() (swisstext.mongo.abstract.seeds.AbstractMongoSeed class method)
(swisstext.mongo.abstract.sentences.AbstractMongoSentence class method)
unskip() (swisstext.mongo.abstract.sentences.DialectInfo method)
url (swisstext.cmd.scraping.data.Page attribute)
(swisstext.mongo.abstract.sentences.AbstractMongoSentence attribute)
(swisstext.mongo.abstract.urls.AbstractMongoBlacklist attribute)
(swisstext.mongo.abstract.urls.AbstractMongoURL attribute)
url_id() (swisstext.mongo.abstract.sentences.AbstractMongoSentence property)
url_to_filename() (swisstext.cmd.scraping.tools.mongo_saver.MongoSaver static method)
UrlCrawlMeta (class in swisstext.mongo.abstract.urls)
URLFILE
st_scrape-from_file command line option
USER (swisstext.mongo.abstract.generic.SourceType attribute)
user (swisstext.mongo.abstract.sentences.DialectEntry attribute)
USER (swisstext.mongo.abstract.users.UserRoles attribute)
UserRoles (class in swisstext.mongo.abstract.users)
V
valid_tool_entries() (swisstext.cmd.base_config.BaseConfig property)
(swisstext.cmd.scraping.config.Config property)
(swisstext.cmd.searching.config.Config property)
validated_by (swisstext.mongo.abstract.sentences.AbstractMongoSentence attribute)