-
Notifications
You must be signed in to change notification settings - Fork 15
Expand file tree
/
Copy pathresourcesbot.py
More file actions
211 lines (179 loc) · 7.49 KB
/
Copy pathresourcesbot.py
File metadata and controls
211 lines (179 loc) · 7.49 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
"""
The ResourcesBot scans through the resources and all their translations, retrieving also
information on PDF/ODT files. It checks if new translations were added and does many
helpful things then like updating language overview pages where necessary.
It is supposed to run daily as a cronjob.
Main steps:
1. gather data: go through all worksheets and all their translations
This will take quite some time as it is many API calls
2. Update JSON representation for every language if necessary, e.g.
https://www.4training.net/4training:de.json
This serves as a cache / "database"
3. Post-processing:
- Update language overview pages where necessary (WriteList)
for example: https://www.4training.net/German#Available_training_resources_in_German
- Update language reports (WriteReport)
for example: https://www.4training.net/4training:German
- WriteSidebarMessages
- Export worksheets in HTML format to a repository (ExportHTML)
- Push the local repository to origin (ExportRepository)
Not yet implemented:
- update a zip file with all worksheets of a language
- send email notifications on updates to mailing lists of the corresponding language
Command line options:
--lang LANGUAGECODE: only look at this one language (significantly faster)
-l, --loglevel: change logging level (standard: warning; other options: debug, info)
--rewrite: Force rewriting of one component or all
--read-from-cache: Read from the JSON structure instead of querying the current
status of all worksheets
--simulate: Dry-run without login, MediaWiki writes, git push, or local exports;
shows pywikibot diffs for wiki page changes (pair with --read-from-cache for speed)
Logging:
If configured in config.ini (see config.example.ini), output will be logged to three
different files in three different verbosity levels (WARNING, INFO, DEBUG)
Reports:
We write language reports into the folder specified in config.ini
(section Paths, variable languagereports)
Examples:
Only update German language information page with more logging
python3 resourcesbot.py --lang de -l info
Quickly rewrite German exported HTML files
python3 resourcesbot.py --read-from-cache --lang de --rewrite html
Normal run (updating language information pages where necessary)
python3 resourcesbot.py
Run without making changes (best for understanding what the script does):
python3 resourcesbot.py --simulate -l info
Pair with --read-from-cache for a faster dry-run:
python3 resourcesbot.py --simulate --read-from-cache -l info
This is only the wrapper script, all main logic is in resourcesbot/bot.py
"""
import argparse
import logging
import os
import sys
import traceback
from configparser import ConfigParser
from typing import List
from pywikitools.resourcesbot.bot import (
ResourcesBot,
build_module_choices,
load_module,
resolve_run_modules,
)
def parse_arguments() -> ResourcesBot:
"""
Parses command-line arguments.
@return: ResourcesBot instance
"""
parser = argparse.ArgumentParser(
prog="python | python3 resourcesbot.py",
description="Update list of available training resources in the"
" language information pages.",
formatter_class=argparse.RawTextHelpFormatter,
)
log_levels: List[str] = ["debug", "info", "warning", "error"]
rewrite_options: List[str] = ["all", "json", "summary"]
modules = build_module_choices()
modules_help = "Select the modules to be run. Available options are:\n"
for abbr, selected_module in modules.items():
module = load_module(selected_module)
modules_help += f" - {abbr}: {module.help_summary()}\n"
if module.can_be_rewritten():
rewrite_options.append(abbr)
modules_help += (
"Default: all modules, or set [resourcesbot] modules= in config.ini "
"(overridden by -m)"
)
parser.add_argument(
"--read-from-cache",
action="store_true",
help="Read results from json cache from the server",
)
parser.add_argument("--lang", help="Process only one language (ISO 639-1 code)")
parser.add_argument("-m", nargs="+", choices=modules.keys(), help=modules_help)
parser.add_argument(
"--rewrite",
choices=rewrite_options,
help="Force rewriting of one component or all.",
)
parser.add_argument(
"-l",
"--loglevel",
choices=log_levels,
default="warning",
help="Set loglevel for the script",
)
parser.add_argument(
"--simulate",
action="store_true",
help="Dry-run: show diffs, skip login, writes, and export modules.",
)
args = parser.parse_args()
limit_to_lang = None
if args.lang is not None:
limit_to_lang = str(args.lang)
config = ConfigParser()
config.read(os.path.dirname(os.path.abspath(__file__)) + "/config.ini")
numeric_level = getattr(logging, args.loglevel.upper(), None)
assert isinstance(numeric_level, int)
set_loglevel(config, numeric_level)
run_modules = resolve_run_modules(modules, config, args.m)
return ResourcesBot(
config=config,
read_from_cache=args.read_from_cache,
limit_to_lang=limit_to_lang,
modules=run_modules,
rewrite=args.rewrite,
simulate=args.simulate,
)
def set_loglevel(config: ConfigParser, loglevel: int):
"""
Setting up logging to three log files and to stdout.
The file paths for the three log files (for each log level
WARNING, INFO and DEBUG) are configured in the config.ini.
@param loglevel: logging.WARNING is standard, logging.INFO for details,
logging.DEBUG for a lot of output.
@param config: A config set of parameters to be used in this function.
"""
root = logging.getLogger()
root.setLevel(logging.DEBUG)
# The following is necessary so that debug messages go to debuglogfile
logging.getLogger("pywikitools.resourcesbot").setLevel(logging.DEBUG)
sh = logging.StreamHandler(sys.stdout)
sh.setLevel(loglevel)
fformatter = logging.Formatter("%(asctime)s %(name)s %(levelname)s: %(message)s")
sh.setFormatter(fformatter)
root.addHandler(sh)
log_path = config.get("Paths", "logs", fallback="")
if log_path == "":
root.warning(
"No log directory specified in configuration."
" Using current working directory"
)
# Logging output to files with different verbosity
if config.has_option("resourcesbot", "logfile"):
fh = logging.FileHandler(f"{log_path}{config['resourcesbot']['logfile']}")
fh.setLevel(logging.WARNING)
fh.setFormatter(fformatter)
root.addHandler(fh)
if config.has_option("resourcesbot", "infologfile"):
fh_info = logging.FileHandler(
f"{log_path}{config['resourcesbot']['infologfile']}"
)
fh_info.setLevel(logging.INFO)
fh_info.setFormatter(fformatter)
root.addHandler(fh_info)
if config.has_option("resourcesbot", "debuglogfile"):
fh_debug = logging.FileHandler(
f"{log_path}{config['resourcesbot']['debuglogfile']}"
)
fh_debug.setLevel(logging.DEBUG)
fh_debug.setFormatter(fformatter)
root.addHandler(fh_debug)
if __name__ == "__main__":
try:
resourcesbot = parse_arguments()
resourcesbot.run()
except Exception as e:
logging.error(f"Exiting because of uncaught exception: {e}")
logging.error(traceback.format_exc())