Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions src/mips/common/util/gen-sjis-encode-table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python3
# Regenerates sjis-encode-table.h: the inverse of sjis-table.h (Unicode -> Shift-JIS).
# The forward table is indexed by a remapped Shift-JIS codepoint (single bytes at
# 0x000-0x0ff; lead 0x80-0x8f at base 0x100; lead 0x90-0x9f at base 0x1100;
# lead 0xe0-0xef at base 0x2100; index = base + ((lead & 0xf) << 8) + trail).
# We walk every forward entry, reconstruct its Shift-JIS bytes, and keep the
# lowest Shift-JIS code per Unicode value (so single-byte forms win over
# double-byte). Run from the repo root.
import re, sys

fwd = [int(x, 16) for x in
re.findall(r'0x[0-9a-fA-F]{4}', open('src/mips/common/util/sjis-table.h').read())]

def index_to_sjis(index):
if index < 0x100:
return index
if index < 0x1100: base, lead_hi = 0x100, 0x80
elif index < 0x2100: base, lead_hi = 0x1100, 0x90
else: base, lead_hi = 0x2100, 0xe0
Comment on lines +17 to +19

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

📐 Maintainability & Code Quality | 🟡 Minor | ⚡ Quick win

Resolve Ruff E701/E702 inline-statement violations.

Line 17, Line 18, Line 19, and Line 46 use multi-statements on one line and currently fail the configured lint rules.

💡 Suggested cleanup
 def index_to_sjis(index):
     if index < 0x100:
         return index
-    if index < 0x1100:   base, lead_hi = 0x100,  0x80
-    elif index < 0x2100: base, lead_hi = 0x1100, 0x90
-    else:                base, lead_hi = 0x2100, 0xe0
+    if index < 0x1100:
+        base, lead_hi = 0x100, 0x80
+    elif index < 0x2100:
+        base, lead_hi = 0x1100, 0x90
+    else:
+        base, lead_hi = 0x2100, 0xe0
@@
         tok = "{0x%04x, 0x%04x}, " % (uni, sjis)
         if len(line) + len(tok) > 116:
-            f.write(line.rstrip() + "\n"); line = "    "
+            f.write(line.rstrip() + "\n")
+            line = "    "

Also applies to: 46-46

🧰 Tools
🪛 Ruff (0.15.18)

[error] 17-17: Multiple statements on one line (colon)

(E701)


[error] 18-18: Multiple statements on one line (colon)

(E701)


[error] 19-19: Multiple statements on one line (colon)

(E701)

🤖 Prompt for AI Agents
Verify each finding against current code. Fix only still-valid issues, skip the
rest with a brief reason, keep changes minimal, and validate.

In `@src/mips/common/util/gen-sjis-encode-table.py` around lines 17 - 19, The if,
elif, and else statements on lines 17-19 contain multiple statements on a single
line (condition and assignment together), violating Ruff E701/E702 rules. Split
each statement by moving the assignment of base and lead_hi to a new line below
the if/elif/else condition, so each line contains only one statement. Apply the
same fix to line 46 which has a similar multi-statement violation.

Source: Linters/SAST tools

r = index - base
return ((lead_hi | (r >> 8)) << 8) | (r & 0xff)

rev = {}
for index, uni in enumerate(fwd):
sjis = index_to_sjis(index)
if sjis >= 0x100:
if uni == 0x0020: # unmapped filler cells decode to space; skip them
continue
trail = sjis & 0xff
if not (0x40 <= trail <= 0x7e or 0x80 <= trail <= 0xfc):
continue
rev.setdefault(uni, sjis)

items = sorted(rev.items())
with open('src/mips/common/util/sjis-encode-table.h', 'w') as f:
f.write("#pragma once\n\n#include <stdint.h>\n\n")
f.write("// Generated by gen-sjis-encode-table.py: the inverse of sjis-table.h.\n")
f.write("// Each index maps a Unicode codepoint to its Shift-JIS encoding. The list is\n")
f.write("// sorted by Unicode value for binary search. A Shift-JIS value <= 0xff is a\n")
f.write("// single byte; otherwise it is two bytes, high byte first.\n\n")
f.write("static const struct {\n uint16_t unicode;\n uint16_t sjis;\n} c_unicodeToSjisConvTable[] = {\n")
line = " "
for uni, sjis in items:
tok = "{0x%04x, 0x%04x}, " % (uni, sjis)
if len(line) + len(tok) > 116:
f.write(line.rstrip() + "\n"); line = " "
line += tok
if line.strip():
f.write(line.rstrip() + "\n")
f.write("};\n")
print("emitted %d entries" % len(items), file=sys.stderr)
1,193 changes: 1,193 additions & 0 deletions src/mips/common/util/sjis-encode-table.h

Large diffs are not rendered by default.

115 changes: 115 additions & 0 deletions src/mips/common/util/sjis-encode.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
#pragma once

#include <stdint.h>

#include "common/util/sjis-encode-table.h"

// Unicode -> Shift-JIS encoding, the inverse of support/sjis_conv.cc's decoder.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe it would be good to add an EASTL version of sjis_conv.h/.cc to psyqo to make it possible to display/printf obtained filenames?

Right now you can't use it as-is, because sjis_conv.h/.cc use which is not available in freestanding mode.

// Shares the same data as the forward table (sjis-table.h), inverted into
// sjis-encode-table.h. Usable from both the host and the MIPS targets.

namespace Sjis {

// Returns the Shift-JIS encoding of a Unicode codepoint, or 0 if it has no
// mapping. A result <= 0xff is a single byte; otherwise it is two bytes, high
// byte first. (U+0000 also returns 0, which is harmless for string use.)
static inline uint16_t unicodeToSjis(uint16_t unicode) {
unsigned lo = 0, hi = sizeof(c_unicodeToSjisConvTable) / sizeof(c_unicodeToSjisConvTable[0]);
while (lo < hi) {
unsigned mid = (lo + hi) / 2;
uint16_t u = c_unicodeToSjisConvTable[mid].unicode;
if (u == unicode) return c_unicodeToSjisConvTable[mid].sjis;
if (u < unicode) {
lo = mid + 1;
} else {
hi = mid;
}
}
return 0;
}

// Decodes one UTF-8 codepoint from str, advancing *index. Returns the codepoint,
// or 0xfffd (replacement) on a malformed sequence. Codepoints above the BMP are
// returned truncated to 0xfffd since Shift-JIS cannot represent them.
static inline uint16_t utf8Decode(const char* str, uint32_t length, uint32_t* index) {
uint32_t i = *index;
uint8_t c = str[i++];
uint32_t cp;
unsigned extra;
if (c < 0x80) {
*index = i;
return c;
} else if ((c & 0xe0) == 0xc0) {
cp = c & 0x1f;
extra = 1;
} else if ((c & 0xf0) == 0xe0) {
cp = c & 0x0f;
extra = 2;
} else {
*index = i;
return 0xfffd;
}
for (unsigned k = 0; k < extra; k++) {
if (i >= length || (str[i] & 0xc0) != 0x80) {
*index = i;
return 0xfffd;
}
cp = (cp << 6) | (str[i++] & 0x3f);
}
*index = i;
return cp > 0xffff ? 0xfffd : (uint16_t)cp;
}

// Encodes a UTF-8 C string to Shift-JIS. Writes up to dstSize bytes and returns
// the number written. Codepoints with no Shift-JIS mapping are emitted as '?'.
// The output is not NUL-terminated.
static inline uint32_t utf8ToSjis(uint8_t* dst, uint32_t dstSize, const char* src) {
uint32_t srcLen = 0;
while (src[srcLen]) srcLen++;
uint32_t in = 0, out = 0;
while (in < srcLen && out < dstSize) {
uint16_t cp = utf8Decode(src, srcLen, &in);
uint16_t sjis = unicodeToSjis(cp);
if (sjis == 0 && cp != 0) sjis = '?';
if (sjis > 0xff) {
if (out + 2 > dstSize) break;
dst[out++] = sjis >> 8;
dst[out++] = sjis & 0xff;
} else {
dst[out++] = sjis & 0xff;
}
}
return out;
}

// Encodes a UTF-8 string to Shift-JIS using the BIOS save-title convention:
// printable ASCII is promoted to its fullwidth form (space -> U+3000,
// 0x21..0x7e -> U+ff01..U+ff5e) so it renders in the manager's fullwidth font,
// while any non-ASCII codepoint (e.g. Japanese) is encoded directly. This is
// what memory card save titles want. Writes up to dstSize bytes, returns the
// number written; output is not NUL-terminated.
static inline uint32_t utf8ToSjisTitle(uint8_t* dst, uint32_t dstSize, const char* src) {
uint32_t srcLen = 0;
while (src[srcLen]) srcLen++;
uint32_t in = 0, out = 0;
while (in < srcLen && out < dstSize) {
uint16_t cp = utf8Decode(src, srcLen, &in);
if (cp == ' ') {
cp = 0x3000;
} else if (cp >= 0x21 && cp <= 0x7e) {
cp = 0xff00 + (cp - 0x20);
}
uint16_t sjis = unicodeToSjis(cp);
if (sjis == 0 && cp != 0) sjis = '?';
if (sjis > 0xff) {
if (out + 2 > dstSize) break;
dst[out++] = sjis >> 8;
dst[out++] = sjis & 0xff;
} else {
dst[out++] = sjis & 0xff;
}
}
return out;
}

} // namespace Sjis
12 changes: 12 additions & 0 deletions src/mips/psyqo/examples/memorycard/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
TARGET = memorycard
TYPE = ps-exe

SRCS = \
memorycard.cpp \

ifeq ($(TEST),true)
CPPFLAGS = -Werror
endif
CXXFLAGS = -std=c++20

include ../../psyqo.mk
Loading
Loading