1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
<?php
/**
* Contains a class for querying external translation service.
*
* @file
* @author Niklas Laxström
* @license GPL-2.0-or-later
*/
/**
* Implements support Caighdean translator api.
* @see https://github.com/kscanne/caighdean/blob/master/API.md
* @ingroup TranslationWebService
* @since 2017.04
*/
class CaighdeanWebService extends TranslationWebService {
public function getType() {
return 'mt';
}
public function mapCode( $code ) {
return $code;
}
protected function doPairs() {
$pairs = [
'gd' => [ 'ga' => true ],
'gv' => [ 'ga' => true ],
];
return $pairs;
}
protected function getQuery( $text, $from, $to ) {
if ( !isset( $this->config['url'] ) ) {
throw new TranslationWebServiceConfigurationException( '`url` not set in configuration' );
}
$text = trim( $text );
if ( $text === '' ) {
throw new TranslationWebServiceInvalidInputException( 'Input is empty' );
}
$data = wfArrayToCgi( [
'foinse' => $from,
'teacs' => $text,
] );
// Maximum payload is 16 KiB. Based ont testing 16000 bytes is safe by leaving 224
// bytes for other things.
if ( strlen( $data ) > 16000 ) {
throw new TranslationWebServiceInvalidInputException( 'Input is over 16000 bytes long' );
}
return TranslationQuery::factory( $this->config['url'] )
->timeout( $this->config['timeout'] )
->postWithData( $data )
->attachProcessingInstructions( $text );
}
protected function parseResponse( TranslationQueryResponse $reply ) {
$body = $reply->getBody();
$response = FormatJson::decode( $body );
if ( !is_array( $response ) ) {
throw new TranslationWebServiceException( 'Invalid json: ' . serialize( $body ) );
}
$text = '';
$originalText = $reply->getQuery()->getProcessingInstructions();
foreach ( $response as list( $sourceToken, $targetToken ) ) {
$separator = ' ';
$pos = strpos( $originalText, $sourceToken );
// Try to keep the effects local. If we fail to match at token, we could accidentally
// scan very far ahead in the text, find a false match and not find matches for all
// of the tokens in the between.
if ( $pos !== false && $pos < 50 ) {
// Remove the portion of text we have processed. $pos should be zero, unless
// we failed to match something earlier.
$originalText = substr( $originalText, $pos + strlen( $sourceToken ) );
if ( preg_match( '/^\s+/', $originalText, $match ) ) {
$separator = $match[ 0 ];
$originalText = substr( $originalText, strlen( $separator ) );
} else {
$separator = '';
}
}
$text .= $targetToken . $separator;
}
return $text;
}
}
|